diff --git a/be/src/common/config.h b/be/src/common/config.h index 5bb910f40d..729330505a 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -187,8 +187,8 @@ CONF_mInt32(push_write_mbytes_per_sec, "100"); CONF_mInt64(column_dictionary_key_ratio_threshold, "0"); CONF_mInt64(column_dictionary_key_size_threshold, "0"); -// memory_limitation_per_thread_for_schema_change unit GB -CONF_mInt32(memory_limitation_per_thread_for_schema_change, "2"); +// memory_limitation_per_thread_for_schema_change_bytes unit bytes +CONF_mInt64(memory_limitation_per_thread_for_schema_change_bytes, "2147483648"); // the clean interval of file descriptor cache and segment cache CONF_mInt32(cache_clean_interval, "1800"); @@ -606,6 +606,9 @@ CONF_Int32(aws_log_level, "3"); // the buffer size when read data from remote storage like s3 CONF_mInt32(remote_storage_read_buffer_mb, "16"); +// Whether Hook TCmalloc new/delete, currently consume/release tls mem tracker in Hook. +CONF_Bool(track_new_delete, "true"); + // Default level of MemTracker to show in web page // now MemTracker support two level: // OVERVIEW: 0 diff --git a/be/src/common/status.h b/be/src/common/status.h index c94c660ff1..a1632bd4b5 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -17,7 +17,8 @@ namespace doris { class Status { enum { - STATE_CAPACITY = 256, + // If the error and log returned by the query are truncated, the status to string may be too long. + STATE_CAPACITY = 2048, HEADER_LEN = 7, MESSAGE_LEN = STATE_CAPACITY - HEADER_LEN }; diff --git a/be/src/exec/analytic_eval_node.cpp b/be/src/exec/analytic_eval_node.cpp index e4a20f2b99..3741241bed 100644 --- a/be/src/exec/analytic_eval_node.cpp +++ b/be/src/exec/analytic_eval_node.cpp @@ -236,10 +236,8 @@ Status AnalyticEvalNode::open(RuntimeState* state) { // Fetch the first input batch so that some _prev_input_row can be set here to avoid // special casing in GetNext(). - _prev_child_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); - _curr_child_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + _prev_child_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); + _curr_child_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); while (!_input_eos && _prev_input_row == nullptr) { RETURN_IF_ERROR(child(0)->get_next(state, _curr_child_batch.get(), &_input_eos)); @@ -738,7 +736,7 @@ Status AnalyticEvalNode::get_next_output_batch(RuntimeState* state, RowBatch* ou ExprContext** ctxs = &_conjunct_ctxs[0]; int num_ctxs = _conjunct_ctxs.size(); - RowBatch input_batch(child(0)->row_desc(), output_batch->capacity(), mem_tracker().get()); + RowBatch input_batch(child(0)->row_desc(), output_batch->capacity()); int64_t stream_idx = _input_stream->rows_returned(); RETURN_IF_ERROR(_input_stream->get_next(&input_batch, eos)); diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp index 5c277e36d1..92bb10886e 100644 --- a/be/src/exec/base_scanner.cpp +++ b/be/src/exec/base_scanner.cpp @@ -39,14 +39,6 @@ BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile, _counter(counter), _src_tuple(nullptr), _src_tuple_row(nullptr), -#if BE_TEST - _mem_tracker(new MemTracker()), -#else - _mem_tracker(MemTracker::create_tracker( - -1, "BaseScanner:" + std::to_string(state->load_job_id()), - state->instance_mem_tracker())), -#endif - _mem_pool(_mem_tracker.get()), _dest_tuple_desc(nullptr), _pre_filter_texprs(pre_filter_texprs), _strict_mode(false), @@ -57,6 +49,13 @@ BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile, _materialize_timer(nullptr), _success(false), _scanner_eof(false) { +#ifndef BE_TEST + _mem_pool.reset(new MemPool(state->query_type() == TQueryType::LOAD + ? "BaseScanner:" + std::to_string(state->load_job_id()) + : "BaseScanner:Select")); +#else + _mem_pool.reset(new MemPool()); +#endif } Status BaseScanner::open() { @@ -97,8 +96,8 @@ Status BaseScanner::init_expr_ctxes() { _src_slot_descs.emplace_back(it->second); } // Construct source tuple and tuple row - _src_tuple = (Tuple*)_mem_pool.allocate(src_tuple_desc->byte_size()); - _src_tuple_row = (TupleRow*)_mem_pool.allocate(sizeof(Tuple*)); + _src_tuple = (Tuple*)_mem_pool->allocate(src_tuple_desc->byte_size()); + _src_tuple_row = (TupleRow*)_mem_pool->allocate(sizeof(Tuple*)); _src_tuple_row->set_tuple(0, _src_tuple); _row_desc.reset(new RowDescriptor(_state->desc_tbl(), std::vector({_params.src_tuple_id}), diff --git a/be/src/exec/base_scanner.h b/be/src/exec/base_scanner.h index a745e938d2..bce0f4b8ca 100644 --- a/be/src/exec/base_scanner.h +++ b/be/src/exec/base_scanner.h @@ -79,7 +79,7 @@ protected: std::shared_ptr _mem_tracker; // Mem pool used to allocate _src_tuple and _src_tuple_row - MemPool _mem_pool; + std::unique_ptr _mem_pool; // Dest tuple descriptor and dest expr context const TupleDescriptor* _dest_tuple_desc; diff --git a/be/src/exec/blocking_join_node.cpp b/be/src/exec/blocking_join_node.cpp index ba137860ac..57196713a2 100644 --- a/be/src/exec/blocking_join_node.cpp +++ b/be/src/exec/blocking_join_node.cpp @@ -23,6 +23,7 @@ #include "gen_cpp/PlanNodes_types.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" +#include "runtime/thread_context.h" #include "util/runtime_profile.h" namespace doris { @@ -69,7 +70,7 @@ Status BlockingJoinNode::prepare(RuntimeState* state) { _probe_tuple_row_size = num_left_tuples * sizeof(Tuple*); _build_tuple_row_size = num_build_tuples * sizeof(Tuple*); - _left_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + _left_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); return Status::OK(); } @@ -82,6 +83,7 @@ Status BlockingJoinNode::close(RuntimeState* state) { } void BlockingJoinNode::build_side_thread(RuntimeState* state, std::promise* status) { + SCOPED_ATTACH_TASK_THREAD(state, mem_tracker()); status->set_value(construct_build_side(state)); } diff --git a/be/src/exec/broker_scan_node.cpp b/be/src/exec/broker_scan_node.cpp index 344ca3f95e..6e156f8d83 100644 --- a/be/src/exec/broker_scan_node.cpp +++ b/be/src/exec/broker_scan_node.cpp @@ -30,6 +30,7 @@ #include "runtime/dpp_sink_internal.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" +#include "runtime/thread_context.h" #include "util/runtime_profile.h" namespace doris { @@ -254,7 +255,7 @@ Status BrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, while (!scanner_eof) { // Fill one row batch std::shared_ptr row_batch( - new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker().get())); + new RowBatch(row_desc(), _runtime_state->batch_size())); // create new tuple buffer for row_batch MemPool* tuple_pool = row_batch->tuple_data_pool(); diff --git a/be/src/exec/cross_join_node.cpp b/be/src/exec/cross_join_node.cpp index e605c91f01..5dcdc10d65 100644 --- a/be/src/exec/cross_join_node.cpp +++ b/be/src/exec/cross_join_node.cpp @@ -54,8 +54,8 @@ Status CrossJoinNode::construct_build_side(RuntimeState* state) { RETURN_IF_ERROR(child(1)->open(state)); while (true) { - RowBatch* batch = _build_batch_pool->add( - new RowBatch(child(1)->row_desc(), state->batch_size(), mem_tracker().get())); + RowBatch* batch = + _build_batch_pool->add(new RowBatch(child(1)->row_desc(), state->batch_size())); RETURN_IF_CANCELLED(state); // TODO(zhaochun): diff --git a/be/src/exec/csv_scan_node.cpp b/be/src/exec/csv_scan_node.cpp index ab64a5c762..e262bda786 100644 --- a/be/src/exec/csv_scan_node.cpp +++ b/be/src/exec/csv_scan_node.cpp @@ -195,7 +195,7 @@ Status CsvScanNode::prepare(RuntimeState* state) { return Status::InternalError("new a csv scanner failed."); } - _tuple_pool.reset(new (std::nothrow) MemPool(state->instance_mem_tracker().get())); + _tuple_pool.reset(new (std::nothrow) MemPool()); if (_tuple_pool.get() == nullptr) { return Status::InternalError("new a mem pool failed."); } diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 64b9792668..ba91ee02c8 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -30,6 +30,7 @@ #include "runtime/dpp_sink_internal.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" +#include "runtime/thread_context.h" #include "service/backend_options.h" #include "util/runtime_profile.h" @@ -323,8 +324,7 @@ Status EsHttpScanNode::scanner_scan(std::unique_ptr scanner, while (!scanner_eof) { // Fill one row batch - std::shared_ptr row_batch( - new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker().get())); + std::shared_ptr row_batch(new RowBatch(row_desc(), _runtime_state->batch_size())); // create new tuple buffer for row_batch MemPool* tuple_pool = row_batch->tuple_data_pool(); @@ -422,6 +422,7 @@ static std::string get_host_port(const std::vector& es_hosts) { } void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise& p_status) { + SCOPED_ATTACH_TASK_THREAD(_runtime_state, mem_tracker()); // Clone expr context std::vector scanner_expr_ctxs; DCHECK(start_idx < length); diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index 9a914b90c2..9a70e88579 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -24,7 +24,6 @@ #include "exprs/expr_context.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" -#include "runtime/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/runtime_state.h" #include "runtime/tuple.h" @@ -43,14 +42,6 @@ EsHttpScanner::EsHttpScanner(RuntimeState* state, RuntimeProfile* profile, Tuple _next_range(0), _line_eof(false), _batch_eof(false), -#if BE_TEST - _mem_tracker(new MemTracker()), -#else - _mem_tracker( - MemTracker::create_tracker(-1, "EsHttpScanner:" + std::to_string(state->load_job_id()), - state->instance_mem_tracker())), -#endif - _mem_pool(_mem_tracker.get()), _tuple_desc(nullptr), _counter(counter), _es_reader(nullptr), @@ -59,6 +50,13 @@ EsHttpScanner::EsHttpScanner(RuntimeState* state, RuntimeProfile* profile, Tuple _rows_read_counter(nullptr), _read_timer(nullptr), _materialize_timer(nullptr) { +#ifndef BE_TEST + _mem_pool.reset(new MemPool(state->query_type() == TQueryType::LOAD + ? "EsHttpScanner:" + std::to_string(state->load_job_id()) + : "EsHttpScanner:Select")); +#else + _mem_pool.reset(new MemPool()); +#endif } EsHttpScanner::~EsHttpScanner() { diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index dcebfe1649..054824459b 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -43,7 +43,6 @@ class TextConverter; class TupleDescriptor; class TupleRow; class RowDescriptor; -class MemTracker; class RuntimeProfile; struct EsScanCounter { @@ -82,8 +81,7 @@ protected: std::vector _slot_descs; std::unique_ptr _row_desc; - std::shared_ptr _mem_tracker; - MemPool _mem_pool; + std::unique_ptr _mem_pool; const TupleDescriptor* _tuple_desc; EsScanCounter* _counter; diff --git a/be/src/exec/except_node.cpp b/be/src/exec/except_node.cpp index 2217d6afdb..8ae170147f 100644 --- a/be/src/exec/except_node.cpp +++ b/be/src/exec/except_node.cpp @@ -55,8 +55,7 @@ Status ExceptNode::open(RuntimeState* state) { } // probe - _probe_batch.reset( - new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get())); + _probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size())); ScopedTimer probe_timer(_probe_timer); RETURN_IF_ERROR(child(i)->open(state)); eos = false; diff --git a/be/src/exec/exchange_node.cpp b/be/src/exec/exchange_node.cpp index 14299c65a6..373cff3a9f 100644 --- a/be/src/exec/exchange_node.cpp +++ b/be/src/exec/exchange_node.cpp @@ -82,8 +82,7 @@ Status ExchangeNode::open(RuntimeState* state) { // create_merger() will populate its merging heap with batches from the _stream_recvr, // so it is not necessary to call fill_input_row_batch(). if (state->enable_exchange_node_parallel_merge()) { - RETURN_IF_ERROR(_stream_recvr->create_parallel_merger(less_than, state->batch_size(), - mem_tracker().get())); + RETURN_IF_ERROR(_stream_recvr->create_parallel_merger(less_than, state->batch_size())); } else { RETURN_IF_ERROR(_stream_recvr->create_merger(less_than)); } diff --git a/be/src/exec/hash_join_node.cpp b/be/src/exec/hash_join_node.cpp index 491c719e53..c92bb5a6b4 100644 --- a/be/src/exec/hash_join_node.cpp +++ b/be/src/exec/hash_join_node.cpp @@ -30,6 +30,7 @@ #include "runtime/row_batch.h" #include "runtime/runtime_filter_mgr.h" #include "runtime/runtime_state.h" +#include "runtime/thread_context.h" #include "util/defer_op.h" #include "util/runtime_profile.h" @@ -147,8 +148,7 @@ Status HashJoinNode::prepare(RuntimeState* state) { stores_nulls, _is_null_safe_eq_join, id(), mem_tracker(), state->batch_size() * 2)); - _probe_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + _probe_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); return Status::OK(); } @@ -177,6 +177,7 @@ Status HashJoinNode::close(RuntimeState* state) { } void HashJoinNode::build_side_thread(RuntimeState* state, std::promise* status) { + SCOPED_ATTACH_TASK_THREAD(state, mem_tracker()); status->set_value(construct_hash_table(state)); } @@ -185,7 +186,7 @@ Status HashJoinNode::construct_hash_table(RuntimeState* state) { // The hash join node needs to keep in memory all build tuples, including the tuple // row ptrs. The row ptrs are copied into the hash table's internal structure so they // don't need to be stored in the _build_pool. - RowBatch build_batch(child(1)->row_desc(), state->batch_size(), mem_tracker().get()); + RowBatch build_batch(child(1)->row_desc(), state->batch_size()); RETURN_IF_ERROR(child(1)->open(state)); SCOPED_TIMER(_build_timer); diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp index 50f9c8c877..9410d7557a 100644 --- a/be/src/exec/hash_table.cpp +++ b/be/src/exec/hash_table.cpp @@ -43,13 +43,12 @@ HashTable::HashTable(const std::vector& build_expr_ctxs, _num_nodes(0), _current_capacity(num_buckets), _current_used(0), - _total_capacity(num_buckets), - _exceeded_limit(false), - _mem_tracker(mem_tracker) { - DCHECK(_mem_tracker); + _total_capacity(num_buckets) { DCHECK_EQ(_build_expr_ctxs.size(), _probe_expr_ctxs.size()); DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) << "num_buckets must be a power of 2"; + _mem_tracker = + MemTracker::create_virtual_tracker(-1, mem_tracker->label() + "HashTable", mem_tracker); _buckets.resize(num_buckets); _num_buckets = num_buckets; _num_buckets_till_resize = MAX_BUCKET_OCCUPANCY_FRACTION * _num_buckets; @@ -71,9 +70,6 @@ HashTable::HashTable(const std::vector& build_expr_ctxs, _end_list.push_back(_current_nodes + _current_capacity * _node_byte_size); _mem_tracker->consume(_current_capacity * _node_byte_size); - if (_mem_tracker->limit_exceeded()) { - mem_limit_exceeded(_current_capacity * _node_byte_size); - } } HashTable::~HashTable() {} @@ -183,7 +179,6 @@ Status HashTable::resize_buckets(int64_t num_buckets) { Status st = _mem_tracker->try_consume(delta_bytes); if (!st) { LOG_EVERY_N(WARNING, 100) << "resize bucket failed: " << st.to_string(); - mem_limit_exceeded(delta_bytes); return st; } @@ -245,13 +240,6 @@ void HashTable::grow_node_array() { _end_list.push_back(_current_nodes + alloc_size); _mem_tracker->consume(alloc_size); - if (_mem_tracker->limit_exceeded()) { - mem_limit_exceeded(alloc_size); - } -} - -void HashTable::mem_limit_exceeded(int64_t allocation_size) { - _exceeded_limit = true; } std::string HashTable::debug_string(bool skip_empty, const RowDescriptor* desc) { diff --git a/be/src/exec/hash_table.h b/be/src/exec/hash_table.h index 304d9369f1..d58824d710 100644 --- a/be/src/exec/hash_table.h +++ b/be/src/exec/hash_table.h @@ -162,9 +162,6 @@ public: return valid_row < MAX_BUCKET_OCCUPANCY_FRACTION * (_buckets.size() / 2.0); } - // true if any of the MemTrackers was exceeded - bool exceeded_limit() const { return _exceeded_limit; } - // Returns the load factor (the number of non-empty buckets) float load_factor() { return _num_filled_buckets / static_cast(_buckets.size()); } @@ -406,8 +403,6 @@ private: // total capacity int64_t _total_capacity; - bool _exceeded_limit; // true if any of _mem_trackers[].limit_exceeded() - std::shared_ptr _mem_tracker; std::vector _buckets; diff --git a/be/src/exec/intersect_node.cpp b/be/src/exec/intersect_node.cpp index 8b327aee3e..2d8d2eefa2 100644 --- a/be/src/exec/intersect_node.cpp +++ b/be/src/exec/intersect_node.cpp @@ -59,7 +59,7 @@ Status IntersectNode::open(RuntimeState* state) { _valid_element_in_hash_tbl = 0; // probe _probe_batch.reset( - new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get())); + new RowBatch(child(i)->row_desc(), state->batch_size())); ScopedTimer probe_timer(_probe_timer); RETURN_IF_ERROR(child(i)->open(state)); eos = false; diff --git a/be/src/exec/merge_join_node.cpp b/be/src/exec/merge_join_node.cpp index d83e872507..d21a1e61e1 100644 --- a/be/src/exec/merge_join_node.cpp +++ b/be/src/exec/merge_join_node.cpp @@ -129,10 +129,8 @@ Status MergeJoinNode::prepare(RuntimeState* state) { _right_tuple_idx.push_back(_row_descriptor.get_tuple_idx(right_tuple_desc->id())); } - _left_child_ctx.reset( - new ChildReaderContext(row_desc(), state->batch_size(), state->instance_mem_tracker())); - _right_child_ctx.reset( - new ChildReaderContext(row_desc(), state->batch_size(), state->instance_mem_tracker())); + _left_child_ctx.reset(new ChildReaderContext(row_desc(), state->batch_size())); + _right_child_ctx.reset(new ChildReaderContext(row_desc(), state->batch_size())); return Status::OK(); } @@ -295,14 +293,12 @@ Status MergeJoinNode::get_input_row(RuntimeState* state, int child_idx) { } if (child_idx == 0) { - _left_child_ctx.reset(new ChildReaderContext(child(child_idx)->row_desc(), - state->batch_size(), - state->instance_mem_tracker())); + _left_child_ctx.reset( + new ChildReaderContext(child(child_idx)->row_desc(), state->batch_size())); ctx = _left_child_ctx.get(); } else { - _right_child_ctx.reset(new ChildReaderContext(child(child_idx)->row_desc(), - state->batch_size(), - state->instance_mem_tracker())); + _right_child_ctx.reset( + new ChildReaderContext(child(child_idx)->row_desc(), state->batch_size())); ctx = _right_child_ctx.get(); } diff --git a/be/src/exec/merge_join_node.h b/be/src/exec/merge_join_node.h index d8b294ea18..ef02727be5 100644 --- a/be/src/exec/merge_join_node.h +++ b/be/src/exec/merge_join_node.h @@ -65,9 +65,8 @@ private: int row_idx; bool is_eos; TupleRow* current_row; - ChildReaderContext(const RowDescriptor& desc, int batch_size, - const std::shared_ptr& mem_tracker) - : batch(desc, batch_size, mem_tracker.get()), + ChildReaderContext(const RowDescriptor& desc, int batch_size) + : batch(desc, batch_size), row_idx(0), is_eos(false), current_row(nullptr) {} diff --git a/be/src/exec/merge_node.cpp b/be/src/exec/merge_node.cpp index 513284e4ca..d92dde402f 100644 --- a/be/src/exec/merge_node.cpp +++ b/be/src/exec/merge_node.cpp @@ -136,8 +136,8 @@ Status MergeNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) // Row batch was either never set or we're moving on to a different child. if (_child_row_batch.get() == nullptr) { RETURN_IF_CANCELLED(state); - _child_row_batch.reset(new RowBatch(child(_child_idx)->row_desc(), state->batch_size(), - mem_tracker().get())); + _child_row_batch.reset( + new RowBatch(child(_child_idx)->row_desc(), state->batch_size())); // Open child and fetch the first row batch. RETURN_IF_ERROR(child(_child_idx)->open(state)); RETURN_IF_ERROR( diff --git a/be/src/exec/mysql_scan_node.cpp b/be/src/exec/mysql_scan_node.cpp index 634f47c784..f83a66fdeb 100644 --- a/be/src/exec/mysql_scan_node.cpp +++ b/be/src/exec/mysql_scan_node.cpp @@ -81,7 +81,7 @@ Status MysqlScanNode::prepare(RuntimeState* state) { return Status::InternalError("new a mysql scanner failed."); } - _tuple_pool.reset(new (std::nothrow) MemPool(mem_tracker().get())); + _tuple_pool.reset(new (std::nothrow) MemPool("MysqlScanNode")); if (_tuple_pool.get() == nullptr) { return Status::InternalError("new a mem pool failed."); diff --git a/be/src/exec/odbc_scan_node.cpp b/be/src/exec/odbc_scan_node.cpp index 958e22ef54..17945bf0a2 100644 --- a/be/src/exec/odbc_scan_node.cpp +++ b/be/src/exec/odbc_scan_node.cpp @@ -74,7 +74,7 @@ Status OdbcScanNode::prepare(RuntimeState* state) { return Status::InternalError("new a odbc scanner failed."); } - _tuple_pool.reset(new (std::nothrow) MemPool(mem_tracker().get())); + _tuple_pool.reset(new (std::nothrow) MemPool("OdbcScanNode")); if (_tuple_pool.get() == nullptr) { return Status::InternalError("new a mem pool failed."); diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 0e750bde44..02f8c725a1 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -34,6 +34,7 @@ #include "runtime/runtime_filter_mgr.h" #include "runtime/runtime_state.h" #include "runtime/string_value.h" +#include "runtime/thread_context.h" #include "runtime/tuple_row.h" #include "util/priority_thread_pool.hpp" #include "util/priority_work_stealing_thread_pool.hpp" @@ -181,7 +182,7 @@ Status OlapScanNode::prepare(RuntimeState* state) { _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); _scanner_mem_tracker = MemTracker::create_tracker(state->instance_mem_tracker()->limit(), - "Scanners", mem_tracker()); + "Scanners", mem_tracker()); if (_tuple_desc == nullptr) { // TODO: make sure we print all available diagnostic output to our error log @@ -1349,6 +1350,7 @@ Status OlapScanNode::normalize_bloom_filter_predicate(SlotDescriptor* slot) { void OlapScanNode::transfer_thread(RuntimeState* state) { // scanner open pushdown to scanThread + SCOPED_ATTACH_TASK_THREAD(state, mem_tracker()); Status status = Status::OK(); for (auto scanner : _olap_scanners) { status = Expr::clone_if_not_exists(_conjunct_ctxs, state, scanner->conjunct_ctxs()); @@ -1515,6 +1517,7 @@ void OlapScanNode::transfer_thread(RuntimeState* state) { } void OlapScanNode::scanner_thread(OlapScanner* scanner) { + SCOPED_ATTACH_TASK_THREAD(_runtime_state, mem_tracker()); if (UNLIKELY(_transfer_done)) { _scanner_done = true; std::unique_lock l(_scan_batches_lock); @@ -1596,8 +1599,7 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) { << ", fragment id=" << print_id(_runtime_state->fragment_instance_id()); break; } - RowBatch* row_batch = new RowBatch(this->row_desc(), state->batch_size(), - _scanner_mem_tracker.get()); + RowBatch* row_batch = new RowBatch(this->row_desc(), state->batch_size()); row_batch->set_scanner_id(scanner->id()); status = scanner->get_batch(_runtime_state, row_batch, &eos); if (!status.ok()) { diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index e95aa11267..2c35d5bd60 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -30,6 +30,7 @@ #include "runtime/descriptors.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" +#include "runtime/thread_context.h" #include "runtime/runtime_state.h" #include "service/backend_options.h" #include "util/doris_metrics.h" @@ -49,8 +50,10 @@ OlapScanner::OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool _aggregation(aggregation), _need_agg_finalize(need_agg_finalize), _version(-1), - _mem_tracker(MemTracker::create_tracker(tracker->limit(), - tracker->label() + ":OlapScanner", tracker)) {} + _mem_tracker(MemTracker::create_tracker( + tracker->limit(), + tracker->label() + ":OlapScanner:" + thread_local_ctx.get()->thread_id_str(), + tracker)) {} Status OlapScanner::prepare( const TPaloScanRange& scan_range, const std::vector& key_ranges, @@ -92,7 +95,7 @@ Status OlapScanner::prepare( // the rowsets maybe compacted when the last olap scanner starts Version rd_version(0, _version); OLAPStatus acquire_reader_st = - _tablet->capture_rs_readers(rd_version, &_tablet_reader_params.rs_readers, _mem_tracker); + _tablet->capture_rs_readers(rd_version, &_tablet_reader_params.rs_readers); if (acquire_reader_st != OLAP_SUCCESS) { LOG(WARNING) << "fail to init reader.res=" << acquire_reader_st; std::stringstream ss; @@ -281,7 +284,7 @@ Status OlapScanner::get_batch(RuntimeState* state, RowBatch* batch, bool* eof) { } // Read one row from reader auto res = _tablet_reader->next_row_with_aggregation(&_read_row_cursor, mem_pool.get(), - batch->agg_object_pool(), eof); + batch->agg_object_pool(), eof); if (res != OLAP_SUCCESS) { std::stringstream ss; ss << "Internal Error: read storage fail. res=" << res diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp index a20f77573b..2ea9f934f8 100644 --- a/be/src/exec/orc_scanner.cpp +++ b/be/src/exec/orc_scanner.cpp @@ -29,7 +29,7 @@ #include "runtime/tuple.h" #if defined(__x86_64__) - #include "exec/hdfs_file_reader.h" +#include "exec/hdfs_file_reader.h" #endif // orc include file didn't expose orc::TimezoneError diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc index d3b9a1ab28..59fa6c3293 100644 --- a/be/src/exec/partitioned_aggregation_node.cc +++ b/be/src/exec/partitioned_aggregation_node.cc @@ -291,7 +291,7 @@ Status PartitionedAggregationNode::open(RuntimeState* state) { // Streaming preaggregations do all processing in GetNext(). if (is_streaming_preagg_) return Status::OK(); - RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker().get()); + RowBatch batch(child(0)->row_desc(), state->batch_size()); // Read all the rows from the child and process them. bool eos = false; do { @@ -360,7 +360,7 @@ Status PartitionedAggregationNode::get_next(RuntimeState* state, RowBatch* row_b // TODO: if ancestor node don't have a no-spilling blocking node, we could avoid a deep_copy // we should a flag indicate this node don't have to deep_copy DCHECK_EQ(row_batch->num_rows(), 0); - RowBatch batch(row_batch->row_desc(), row_batch->capacity(), _mem_tracker.get()); + RowBatch batch(row_batch->row_desc(), row_batch->capacity()); int first_row_idx = batch.num_rows(); RETURN_IF_ERROR(GetNextInternal(state, &batch, eos)); RETURN_IF_ERROR(HandleOutputStrings(&batch, first_row_idx)); @@ -533,8 +533,7 @@ Status PartitionedAggregationNode::GetRowsStreaming(RuntimeState* state, RowBatc DCHECK(is_streaming_preagg_); if (child_batch_ == nullptr) { - child_batch_.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + child_batch_.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); } do { @@ -745,9 +744,11 @@ Status PartitionedAggregationNode::Partition::InitStreams() { RETURN_IF_ERROR(aggregated_row_stream->Init(parent->id(), true)); bool got_buffer; RETURN_IF_ERROR(aggregated_row_stream->PrepareForWrite(&got_buffer)); - DCHECK(got_buffer) << "Buffer included in reservation " << parent->_id << "\n" - << parent->_buffer_pool_client.DebugString() << "\n" - << parent->DebugString(2); + // TODO(zxy) If exec_mem_limit is very small, DCHECK(false) will occur, the logic of + // reservation tracker needs to be deleted or refactored + // DCHECK(got_buffer) << "Buffer included in reservation " << parent->_id << "\n" + // << parent->_buffer_pool_client.DebugString() << "\n" + // << parent->DebugString(2); if (!parent->is_streaming_preagg_) { unaggregated_row_stream.reset(new BufferedTupleStream3( @@ -1345,7 +1346,7 @@ Status PartitionedAggregationNode::ProcessStream(BufferedTupleStream3* input_str bool eos = false; const RowDescriptor* desc = AGGREGATED_ROWS ? &intermediate_row_desc_ : &(_children[0]->row_desc()); - RowBatch batch(*desc, state_->batch_size(), mem_tracker().get()); + RowBatch batch(*desc, state_->batch_size()); do { RETURN_IF_ERROR(input_stream->GetNext(&batch, &eos)); RETURN_IF_ERROR(ProcessBatch(&batch, ht_ctx_.get())); diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc index d197148b61..34294cb82c 100644 --- a/be/src/exec/partitioned_hash_table.cc +++ b/be/src/exec/partitioned_hash_table.cc @@ -151,7 +151,7 @@ Status PartitionedHashTableCtx::Open(RuntimeState* state) { void PartitionedHashTableCtx::Close(RuntimeState* state) { free(scratch_row_); scratch_row_ = nullptr; - expr_values_cache_.Close(tracker_); + expr_values_cache_.Close(); for (int i = 0; i < build_expr_evals_.size(); i++) { build_expr_evals_[i]->close(state); } @@ -337,7 +337,7 @@ Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, return Status::OK(); } -void PartitionedHashTableCtx::ExprValuesCache::Close(const std::shared_ptr& tracker) { +void PartitionedHashTableCtx::ExprValuesCache::Close() { if (capacity_ == 0) return; cur_expr_values_ = nullptr; cur_expr_values_null_ = nullptr; @@ -347,8 +347,6 @@ void PartitionedHashTableCtx::ExprValuesCache::Close(const std::shared_ptrrelease(mem_usage); } int PartitionedHashTableCtx::ExprValuesCache::MemUsage(int capacity, int expr_values_bytes_per_row, diff --git a/be/src/exec/partitioned_hash_table.h b/be/src/exec/partitioned_hash_table.h index 23a9c3aaab..80007617d8 100644 --- a/be/src/exec/partitioned_hash_table.h +++ b/be/src/exec/partitioned_hash_table.h @@ -211,8 +211,7 @@ public: const std::vector& build_exprs); /// Frees up various resources and updates memory tracker with proper accounting. - /// 'tracker' should be the same memory tracker which was passed in for Init(). - void Close(const std::shared_ptr& tracker); + void Close(); /// Resets the cache states (iterators, end pointers etc) before writing. void Reset() noexcept; diff --git a/be/src/exec/repeat_node.cpp b/be/src/exec/repeat_node.cpp index 78d937edd2..401b40e318 100644 --- a/be/src/exec/repeat_node.cpp +++ b/be/src/exec/repeat_node.cpp @@ -175,8 +175,7 @@ Status RepeatNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) return Status::OK(); } - _child_row_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + _child_row_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); RETURN_IF_ERROR(child(0)->get_next(state, _child_row_batch.get(), &_child_eos)); if (_child_row_batch->num_rows() <= 0) { diff --git a/be/src/exec/schema_scan_node.cpp b/be/src/exec/schema_scan_node.cpp index b393452883..fae3a1dd9e 100644 --- a/be/src/exec/schema_scan_node.cpp +++ b/be/src/exec/schema_scan_node.cpp @@ -101,7 +101,7 @@ Status SchemaScanNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ScanNode::prepare(state)); // new one mem pool - _tuple_pool.reset(new (std::nothrow) MemPool(mem_tracker().get())); + _tuple_pool.reset(new (std::nothrow) MemPool()); if (nullptr == _tuple_pool.get()) { return Status::InternalError("Allocate MemPool failed."); diff --git a/be/src/exec/select_node.cpp b/be/src/exec/select_node.cpp index 25057686c4..a6db3a402d 100644 --- a/be/src/exec/select_node.cpp +++ b/be/src/exec/select_node.cpp @@ -33,8 +33,7 @@ SelectNode::SelectNode(ObjectPool* pool, const TPlanNode& tnode, const Descripto Status SelectNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); - _child_row_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + _child_row_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); return Status::OK(); } diff --git a/be/src/exec/set_operation_node.cpp b/be/src/exec/set_operation_node.cpp index 827e30a03c..7faa561222 100644 --- a/be/src/exec/set_operation_node.cpp +++ b/be/src/exec/set_operation_node.cpp @@ -146,7 +146,7 @@ Status SetOperationNode::open(RuntimeState* state) { // initial build hash table used for remove duplicated _hash_tbl.reset(new HashTable(_child_expr_lists[0], _child_expr_lists[1], _build_tuple_size, true, _find_nulls, id(), mem_tracker(), state->batch_size() * 2)); - RowBatch build_batch(child(0)->row_desc(), state->batch_size(), mem_tracker().get()); + RowBatch build_batch(child(0)->row_desc(), state->batch_size()); RETURN_IF_ERROR(child(0)->open(state)); bool eos = false; diff --git a/be/src/exec/spill_sort_node.cc b/be/src/exec/spill_sort_node.cc index ef527a18ed..58802741c0 100644 --- a/be/src/exec/spill_sort_node.cc +++ b/be/src/exec/spill_sort_node.cc @@ -153,7 +153,7 @@ void SpillSortNode::debug_string(int indentation_level, stringstream* out) const } Status SpillSortNode::sort_input(RuntimeState* state) { - RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker().get()); + RowBatch batch(child(0)->row_desc(), state->batch_size()); bool eos = false; do { batch.reset(); diff --git a/be/src/exec/table_function_node.cpp b/be/src/exec/table_function_node.cpp index db262054c6..3240df5013 100644 --- a/be/src/exec/table_function_node.cpp +++ b/be/src/exec/table_function_node.cpp @@ -206,8 +206,7 @@ Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, boo RETURN_IF_ERROR(state->check_query_state("TableFunctionNode, while getting next batch.")); if (_cur_child_batch == nullptr) { - _cur_child_batch.reset( - new RowBatch(child_rowdesc, state->batch_size(), mem_tracker().get())); + _cur_child_batch.reset(new RowBatch(child_rowdesc, state->batch_size())); } if (_child_batch_exhausted) { if (_child_eos) { diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 483ecbdccb..edc7d0eca4 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -416,7 +416,7 @@ VOlapTablePartitionParam::VOlapTablePartitionParam(std::shared_ptrtuple_desc()->slots()), - _mem_tracker(MemTracker::create_tracker(-1, "OlapTablePartitionParam")) { + _mem_tracker(MemTracker::create_virtual_tracker(-1, "OlapTablePartitionParam")) { for (auto slot : _slots) { _partition_block.insert({slot->get_empty_mutable_column(), slot->get_data_type_ptr(), slot->col_name()}); } diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index be547cfd91..6be3e2dba1 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -29,6 +29,7 @@ #include "runtime/exec_env.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" +#include "runtime/thread_context.h" #include "runtime/tuple_row.h" #include "service/backend_options.h" #include "service/brpc.h" @@ -49,6 +50,7 @@ NodeChannel::NodeChannel(OlapTableSink* parent, IndexChannel* index_channel, int if (_parent->_transfer_data_by_brpc_attachment) { _tuple_data_buffer_ptr = &_tuple_data_buffer; } + _node_channel_tracker = MemTracker::create_tracker(-1, "NodeChannel" + thread_local_ctx.get()->thread_id_str()); } NodeChannel::~NodeChannel() noexcept { @@ -83,7 +85,7 @@ Status NodeChannel::init(RuntimeState* state) { _row_desc.reset(new RowDescriptor(_tuple_desc, false)); _batch_size = state->batch_size(); - _cur_batch.reset(new RowBatch(*_row_desc, _batch_size, _parent->_mem_tracker.get())); + _cur_batch.reset(new RowBatch(*_row_desc, _batch_size)); _stub = state->exec_env()->brpc_internal_client_cache()->get_client(_node_info.host, _node_info.brpc_port); @@ -279,7 +281,7 @@ Status NodeChannel::add_row(Tuple* input_tuple, int64_t tablet_id) { _pending_batches_num++; } - _cur_batch.reset(new RowBatch(*_row_desc, _batch_size, _parent->_mem_tracker.get())); + _cur_batch.reset(new RowBatch(*_row_desc, _batch_size)); _cur_add_batch_request.clear_tablet_ids(); row_no = _cur_batch->add_row(); @@ -331,7 +333,7 @@ Status NodeChannel::add_row(BlockRow& block_row, int64_t tablet_id) { _pending_batches_num++; } - _cur_batch.reset(new RowBatch(*_row_desc, _batch_size, _parent->_mem_tracker.get())); + _cur_batch.reset(new RowBatch(*_row_desc, _batch_size)); _cur_add_batch_request.clear_tablet_ids(); row_no = _cur_batch->add_row(); @@ -443,7 +445,8 @@ void NodeChannel::cancel(const std::string& cancel_msg) { request.release_id(); } -int NodeChannel::try_send_and_fetch_status(std::unique_ptr& thread_pool_token) { +int NodeChannel::try_send_and_fetch_status(RuntimeState* state, + std::unique_ptr& thread_pool_token) { auto st = none_of({_cancelled, _send_finished}); if (!st.ok()) { return 0; @@ -451,7 +454,8 @@ int NodeChannel::try_send_and_fetch_status(std::unique_ptr& thr bool is_finished = true; if (!_add_batch_closure->is_packet_in_flight() && _pending_batches_num > 0 && _last_patch_processed_finished.compare_exchange_strong(is_finished, false)) { - auto s = thread_pool_token->submit_func(std::bind(&NodeChannel::try_send_batch, this)); + auto s = thread_pool_token->submit_func( + std::bind(&NodeChannel::try_send_batch, this, state)); if (!s.ok()) { _cancel_with_msg("submit send_batch task to send_batch_thread_pool failed"); } @@ -459,7 +463,8 @@ int NodeChannel::try_send_and_fetch_status(std::unique_ptr& thr return _send_finished ? 0 : 1; } -void NodeChannel::try_send_batch() { +void NodeChannel::try_send_batch(RuntimeState* state) { + SCOPED_ATTACH_TASK_THREAD(state, _node_channel_tracker); SCOPED_ATOMIC_TIMER(&_actual_consume_ns); AddBatchReq send_batch; { @@ -764,7 +769,7 @@ Status OlapTableSink::prepare(RuntimeState* state) { } _output_row_desc = _pool->add(new RowDescriptor(_output_tuple_desc, false)); - _output_batch.reset(new RowBatch(*_output_row_desc, state->batch_size(), _mem_tracker.get())); + _output_batch.reset(new RowBatch(*_output_row_desc, state->batch_size())); _max_decimalv2_val.resize(_output_tuple_desc->slots().size()); _min_decimalv2_val.resize(_output_tuple_desc->slots().size()); @@ -865,8 +870,8 @@ Status OlapTableSink::open(RuntimeState* state) { _send_batch_thread_pool_token = state->exec_env()->send_batch_thread_pool()->new_token( ThreadPool::ExecutionMode::CONCURRENT, send_batch_parallelism); RETURN_IF_ERROR(Thread::create( - "OlapTableSink", "send_batch_process", [this]() { this->_send_batch_process(); }, - &_sender_thread)); + "OlapTableSink", "send_batch_process", + [this, state]() { this->_send_batch_process(state); }, &_sender_thread)); return Status::OK(); } @@ -1248,14 +1253,15 @@ Status OlapTableSink::_validate_data(RuntimeState* state, RowBatch* batch, Bitma return Status::OK(); } -void OlapTableSink::_send_batch_process() { +void OlapTableSink::_send_batch_process(RuntimeState* state) { SCOPED_TIMER(_non_blocking_send_timer); + SCOPED_ATTACH_TASK_THREAD(state, _mem_tracker); do { int running_channels_num = 0; for (auto index_channel : _channels) { - index_channel->for_each_node_channel([&running_channels_num, this](const std::shared_ptr& ch) { + index_channel->for_each_node_channel([&running_channels_num, this, state](const std::shared_ptr& ch) { running_channels_num += - ch->try_send_and_fetch_status(this->_send_batch_thread_pool_token); + ch->try_send_and_fetch_status(state, this->_send_batch_thread_pool_token); }); } diff --git a/be/src/exec/tablet_sink.h b/be/src/exec/tablet_sink.h index b5c10b7d56..e0161ba76e 100644 --- a/be/src/exec/tablet_sink.h +++ b/be/src/exec/tablet_sink.h @@ -180,9 +180,10 @@ public: // 1: running, haven't reach eos. // only allow 1 rpc in flight // plz make sure, this func should be called after open_wait(). - int try_send_and_fetch_status(std::unique_ptr& thread_pool_token); + int try_send_and_fetch_status(RuntimeState* state, + std::unique_ptr& thread_pool_token); - void try_send_batch(); + void try_send_batch(RuntimeState* state); void time_report(std::unordered_map* add_batch_counter_map, int64_t* serialize_batch_ns, int64_t* mem_exceeded_block_ns, @@ -206,7 +207,6 @@ public: Status none_of(std::initializer_list vars); - // TODO(HW): remove after mem tracker shared void clear_all_batches(); std::string channel_info() const { @@ -224,6 +224,8 @@ private: std::string _load_info; std::string _name; + std::shared_ptr _node_channel_tracker; + TupleDescriptor* _tuple_desc = nullptr; NodeInfo _node_info; @@ -299,7 +301,9 @@ private: class IndexChannel { public: - IndexChannel(OlapTableSink* parent, int64_t index_id) : _parent(parent), _index_id(index_id) {} + IndexChannel(OlapTableSink* parent, int64_t index_id) : _parent(parent), _index_id(index_id) { + _index_channel_tracker = MemTracker::create_tracker(-1, "IndexChannel"); + } ~IndexChannel(); Status init(RuntimeState* state, const std::vector& tablets); @@ -347,6 +351,8 @@ private: // key is tablet_id, value is error message std::unordered_map _failed_channels_msgs; Status _intolerable_failure_status = Status::OK(); + + std::shared_ptr _index_channel_tracker; // TODO(zxy) use after }; // Write data to Olap Table. @@ -389,7 +395,7 @@ private: // the consumer func of sending pending batches in every NodeChannel. // use polling & NodeChannel::try_send_and_fetch_status() to achieve nonblocking sending. // only focus on pending batches and channel status, the internal errors of NodeChannels will be handled by the producer - void _send_batch_process(); + void _send_batch_process(RuntimeState* state); protected: friend class NodeChannel; diff --git a/be/src/exec/topn_node.cpp b/be/src/exec/topn_node.cpp index 7e98e1d329..9f7cb71fc3 100644 --- a/be/src/exec/topn_node.cpp +++ b/be/src/exec/topn_node.cpp @@ -95,7 +95,7 @@ Status TopNNode::open(RuntimeState* state) { // Limit of 0, no need to fetch anything from children. if (_limit != 0) { - RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker().get()); + RowBatch batch(child(0)->row_desc(), state->batch_size()); bool eos = false; do { diff --git a/be/src/exec/union_node.cpp b/be/src/exec/union_node.cpp index e4d13e2005..d9c389f823 100644 --- a/be/src/exec/union_node.cpp +++ b/be/src/exec/union_node.cpp @@ -152,8 +152,7 @@ Status UnionNode::get_next_materialized(RuntimeState* state, RowBatch* row_batch // Child row batch was either never set or we're moving on to a different child. if (_child_batch.get() == nullptr) { DCHECK_LT(_child_idx, _children.size()); - _child_batch.reset(new RowBatch(child(_child_idx)->row_desc(), state->batch_size(), - mem_tracker().get())); + _child_batch.reset(new RowBatch(child(_child_idx)->row_desc(), state->batch_size())); _child_row_idx = 0; // open the current child unless it's the first child, which was already opened in // UnionNode::open(). diff --git a/be/src/exprs/agg_fn_evaluator.cpp b/be/src/exprs/agg_fn_evaluator.cpp index 7d790d5361..b458c6f801 100644 --- a/be/src/exprs/agg_fn_evaluator.cpp +++ b/be/src/exprs/agg_fn_evaluator.cpp @@ -154,9 +154,9 @@ Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, M _intermediate_slot_desc = intermediate_slot_desc; _string_buffer_len = 0; - _mem_tracker = mem_tracker; + _mem_tracker = MemTracker::create_virtual_tracker(-1, "AggFnEvaluator", mem_tracker); - Status status = Expr::prepare(_input_exprs_ctxs, state, desc, _mem_tracker); + Status status = Expr::prepare(_input_exprs_ctxs, state, desc, mem_tracker); RETURN_IF_ERROR(status); ObjectPool* obj_pool = state->obj_pool(); diff --git a/be/src/exprs/bloomfilter_predicate.h b/be/src/exprs/bloomfilter_predicate.h index a55a1ce6ab..d9ce266c91 100644 --- a/be/src/exprs/bloomfilter_predicate.h +++ b/be/src/exprs/bloomfilter_predicate.h @@ -88,18 +88,21 @@ public: virtual Status assign(const char* data, int len) = 0; virtual Status get_data(char** data, int* len) = 0; - virtual MemTracker* tracker() = 0; virtual void light_copy(IBloomFilterFuncBase* other) = 0; }; template class BloomFilterFuncBase : public IBloomFilterFuncBase { public: - BloomFilterFuncBase(MemTracker* tracker) : _tracker(tracker), _inited(false) {} + BloomFilterFuncBase() : _inited(false) { + _tracker = MemTracker::create_virtual_tracker(-1, "BloomFilterFunc"); + } - // Do not release _bloom_filter_alloced, this does not affect the final statistic. - // RuntimeFilterMgr._tracker will be destructed first in ~RuntimeState. - virtual ~BloomFilterFuncBase() {} + virtual ~BloomFilterFuncBase() { + if (_tracker != nullptr) { + _tracker->release(_bloom_filter_alloced); + } + } Status init(int64_t expect_num, double fpp) override { size_t filter_size = BloomFilterAdaptor::optimal_bit_num(expect_num, fpp); @@ -146,18 +149,16 @@ public: return Status::OK(); } - MemTracker* tracker() override { return _tracker; } - void light_copy(IBloomFilterFuncBase* bloomfilter_func) override { auto other_func = static_cast(bloomfilter_func); - _tracker = nullptr; + _tracker = nullptr; // Avoid repeated release when ~BloomFilterFuncBase _bloom_filter_alloced = other_func->_bloom_filter_alloced; _bloom_filter = other_func->_bloom_filter; _inited = other_func->_inited; } protected: - MemTracker* _tracker; + std::shared_ptr _tracker; // bloom filter size int32_t _bloom_filter_alloced; std::shared_ptr _bloom_filter; @@ -296,7 +297,7 @@ struct BloomFilterTypeTraits { template class BloomFilterFunc final : public BloomFilterFuncBase { public: - BloomFilterFunc(MemTracker* tracker) : BloomFilterFuncBase(tracker) {} + BloomFilterFunc() : BloomFilterFuncBase() {} ~BloomFilterFunc() = default; diff --git a/be/src/exprs/create_predicate_function.h b/be/src/exprs/create_predicate_function.h index 27aef88c92..b8acae5964 100644 --- a/be/src/exprs/create_predicate_function.h +++ b/be/src/exprs/create_predicate_function.h @@ -20,7 +20,6 @@ #include "exprs/bloomfilter_predicate.h" #include "exprs/hybrid_set.h" #include "exprs/minmax_predicate.h" -#include "runtime/mem_tracker.h" namespace doris { @@ -28,7 +27,7 @@ class MinmaxFunctionTraits { public: using BasePtr = MinMaxFuncBase*; template - static BasePtr get_function([[maybe_unused]] MemTracker* tracker) { + static BasePtr get_function() { return new (std::nothrow) MinMaxNumFunc::CppType>(); }; }; @@ -37,7 +36,7 @@ class HybridSetTraits { public: using BasePtr = HybridSetBase*; template - static BasePtr get_function([[maybe_unused]] MemTracker* tracker) { + static BasePtr get_function() { using CppType = typename PrimitiveTypeTraits::CppType; using Set = std::conditional_t, StringValueSet, HybridSet>; @@ -49,8 +48,8 @@ class BloomFilterTraits { public: using BasePtr = IBloomFilterFuncBase*; template - static BasePtr get_function(MemTracker* tracker) { - return new BloomFilterFunc(tracker); + static BasePtr get_function() { + return new BloomFilterFunc(); }; }; @@ -58,49 +57,48 @@ template class PredicateFunctionCreator { public: template - static typename Traits::BasePtr create(MemTracker* tracker = nullptr) { - return Traits::template get_function(tracker); + static typename Traits::BasePtr create() { + return Traits::template get_function(); } }; template -typename Traits::BasePtr create_predicate_function(PrimitiveType type, - MemTracker* tracker = nullptr) { +typename Traits::BasePtr create_predicate_function(PrimitiveType type) { using Creator = PredicateFunctionCreator; switch (type) { case TYPE_BOOLEAN: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_TINYINT: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_SMALLINT: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_INT: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_BIGINT: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_LARGEINT: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_FLOAT: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_DOUBLE: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_DECIMALV2: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_DATE: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_DATETIME: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_CHAR: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_VARCHAR: - return Creator::template create(tracker); + return Creator::template create(); case TYPE_STRING: - return Creator::template create(tracker); + return Creator::template create(); default: DCHECK(false) << "Invalid type."; @@ -117,8 +115,8 @@ inline auto create_set(PrimitiveType type) { return create_predicate_function(type); } -inline auto create_bloom_filter(MemTracker* tracker, PrimitiveType type) { - return create_predicate_function(type, tracker); +inline auto create_bloom_filter(PrimitiveType type) { + return create_predicate_function(type); } } // namespace doris \ No newline at end of file diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index d4c00f7521..1bf229b4e9 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -824,7 +824,7 @@ void Expr::assign_fn_ctx_idx(int* next_fn_ctx_idx) { Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, ObjectPool* pool, Expr** scalar_expr) { *scalar_expr = nullptr; - Expr* root; + Expr* root = nullptr; RETURN_IF_ERROR(create_expr(pool, texpr.nodes[0], &root)); RETURN_IF_ERROR(create_tree(texpr, pool, root)); // TODO pengyubing replace by Init() @@ -848,7 +848,7 @@ Status Expr::create(const std::vector& texprs, const RowDescriptor& row_d RuntimeState* state, ObjectPool* pool, std::vector* exprs) { exprs->clear(); for (const TExpr& texpr : texprs) { - Expr* expr; + Expr* expr = nullptr; RETURN_IF_ERROR(create(texpr, row_desc, state, pool, &expr)); DCHECK(expr != nullptr); exprs->push_back(expr); diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 48cee872cc..7226eaa4d4 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -320,23 +320,17 @@ BinaryPredicate* create_bin_predicate(ObjectPool* pool, PrimitiveType prim_type, // This class is a wrapper of runtime predicate function class RuntimePredicateWrapper { public: - RuntimePredicateWrapper(RuntimeState* state, MemTracker* tracker, ObjectPool* pool, + RuntimePredicateWrapper(RuntimeState* state, ObjectPool* pool, const RuntimeFilterParams* params) - : _tracker(tracker), - _pool(pool), + : _pool(pool), _column_return_type(params->column_return_type), _filter_type(params->filter_type), _fragment_instance_id(params->fragment_instance_id), _filter_id(params->filter_id) {} // for a 'tmp' runtime predicate wrapper // only could called assign method or as a param for merge - RuntimePredicateWrapper(MemTracker* tracker, ObjectPool* pool, RuntimeFilterType type, - UniqueId fragment_instance_id, uint32_t filter_id) - : _tracker(tracker), - _pool(pool), - _filter_type(type), - _fragment_instance_id(fragment_instance_id), - _filter_id(filter_id) {} + RuntimePredicateWrapper(ObjectPool* pool, RuntimeFilterType type, UniqueId fragment_instance_id, uint32_t filter_id) + : _pool(pool), _filter_type(type), _fragment_instance_id(fragment_instance_id), _filter_id(filter_id) {} // init runtime filter wrapper // alloc memory to init runtime filter function Status init(const RuntimeFilterParams* params) { @@ -352,12 +346,12 @@ public: } case RuntimeFilterType::BLOOM_FILTER: { _is_bloomfilter = true; - _bloomfilter_func.reset(create_bloom_filter(_tracker, _column_return_type)); + _bloomfilter_func.reset(create_bloom_filter(_column_return_type)); return _bloomfilter_func->init_with_fixed_length(params->bloom_filter_size); } case RuntimeFilterType::IN_OR_BLOOM_FILTER: { _hybrid_set.reset(create_set(_column_return_type)); - _bloomfilter_func.reset(create_bloom_filter(_tracker, _column_return_type)); + _bloomfilter_func.reset(create_bloom_filter(_column_return_type)); return _bloomfilter_func->init_with_fixed_length(params->bloom_filter_size); } default: @@ -630,8 +624,6 @@ public: } Status assign(const PInFilter* in_filter) { - DCHECK(_tracker != nullptr); - PrimitiveType type = to_primitive_type(in_filter->column_type()); if (in_filter->has_ignored_msg()) { VLOG_DEBUG << "Ignore in filter(id=" << _filter_id @@ -754,18 +746,16 @@ public: // used by shuffle runtime filter // assign this filter by protobuf Status assign(const PBloomFilter* bloom_filter, const char* data) { - DCHECK(_tracker != nullptr); _is_bloomfilter = true; // we won't use this class to insert or find any data // so any type is ok - _bloomfilter_func.reset(create_bloom_filter(_tracker, PrimitiveType::TYPE_INT)); + _bloomfilter_func.reset(create_bloom_filter(PrimitiveType::TYPE_INT)); return _bloomfilter_func->assign(data, bloom_filter->filter_length()); } // used by shuffle runtime filter // assign this filter by protobuf Status assign(const PMinMaxFilter* minmax_filter) { - DCHECK(_tracker != nullptr); PrimitiveType type = to_primitive_type(minmax_filter->column_type()); _minmax_func.reset(create_minmax_filter(type)); switch (type) { @@ -904,7 +894,6 @@ public: } private: - MemTracker* _tracker; ObjectPool* _pool; PrimitiveType _column_return_type; // column type RuntimeFilterType _filter_type; @@ -919,10 +908,10 @@ private: uint32_t _filter_id; }; -Status IRuntimeFilter::create(RuntimeState* state, MemTracker* tracker, ObjectPool* pool, +Status IRuntimeFilter::create(RuntimeState* state, ObjectPool* pool, const TRuntimeFilterDesc* desc, const TQueryOptions* query_options, const RuntimeFilterRole role, int node_id, IRuntimeFilter** res) { - *res = pool->add(new IRuntimeFilter(state, tracker, pool)); + *res = pool->add(new IRuntimeFilter(state, pool)); (*res)->set_role(role); UniqueId fragment_instance_id(state->fragment_instance_id()); return (*res)->init_with_desc(desc, query_options, fragment_instance_id, node_id); @@ -1062,7 +1051,7 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue RETURN_IF_ERROR(Expr::create_expr_tree(_pool, iter->second, &_probe_ctx)); } - _wrapper = _pool->add(new RuntimePredicateWrapper(_state, _mem_tracker, _pool, ¶ms)); + _wrapper = _pool->add(new RuntimePredicateWrapper(_state, _pool, ¶ms)); return _wrapper->init(¶ms); } @@ -1074,16 +1063,14 @@ Status IRuntimeFilter::serialize(PPublishFilterRequest* request, void** data, in return serialize_impl(request, data, len); } -Status IRuntimeFilter::create_wrapper(const MergeRuntimeFilterParams* param, MemTracker* tracker, - ObjectPool* pool, +Status IRuntimeFilter::create_wrapper(const MergeRuntimeFilterParams* param, ObjectPool* pool, std::unique_ptr* wrapper) { - return _create_wrapper(param, tracker, pool, wrapper); + return _create_wrapper(param, pool, wrapper); } -Status IRuntimeFilter::create_wrapper(const UpdateRuntimeFilterParams* param, MemTracker* tracker, - ObjectPool* pool, +Status IRuntimeFilter::create_wrapper(const UpdateRuntimeFilterParams* param, ObjectPool* pool, std::unique_ptr* wrapper) { - return _create_wrapper(param, tracker, pool, wrapper); + return _create_wrapper(param, pool, wrapper); } void IRuntimeFilter::change_to_bloom_filter() { @@ -1095,12 +1082,11 @@ void IRuntimeFilter::change_to_bloom_filter() { } template -Status IRuntimeFilter::_create_wrapper(const T* param, MemTracker* tracker, ObjectPool* pool, +Status IRuntimeFilter::_create_wrapper(const T* param, ObjectPool* pool, std::unique_ptr* wrapper) { int filter_type = param->request->filter_type(); - wrapper->reset(new RuntimePredicateWrapper(tracker, pool, get_type(filter_type), - UniqueId(param->request->fragment_id()), - param->request->filter_id())); + wrapper->reset(new RuntimePredicateWrapper(pool, get_type(filter_type), + UniqueId(param->request->fragment_id()), param->request->filter_id())); switch (filter_type) { case PFilterType::IN_FILTER: { @@ -1399,7 +1385,7 @@ Status IRuntimeFilter::update_filter(const UpdateRuntimeFilterParams* param) { set_ignored_msg(*msg); } std::unique_ptr wrapper; - RETURN_IF_ERROR(IRuntimeFilter::create_wrapper(param, _mem_tracker, _pool, &wrapper)); + RETURN_IF_ERROR(IRuntimeFilter::create_wrapper(param, _pool, &wrapper)); auto origin_type = _wrapper->get_real_type(); RETURN_IF_ERROR(_wrapper->merge(wrapper.get())); if (origin_type != _wrapper->get_real_type()) { diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index 663843d2b6..8d5b433b9b 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -106,9 +106,8 @@ struct MergeRuntimeFilterParams { /// that can be pushed down to node based on the results of the right table. class IRuntimeFilter { public: - IRuntimeFilter(RuntimeState* state, MemTracker* mem_tracker, ObjectPool* pool) + IRuntimeFilter(RuntimeState* state, ObjectPool* pool) : _state(state), - _mem_tracker(mem_tracker), _pool(pool), _runtime_filter_type(RuntimeFilterType::UNKNOWN_FILTER), _filter_id(-1), @@ -124,7 +123,7 @@ public: ~IRuntimeFilter() = default; - static Status create(RuntimeState* state, MemTracker* tracker, ObjectPool* pool, + static Status create(RuntimeState* state, ObjectPool* pool, const TRuntimeFilterDesc* desc, const TQueryOptions* query_options, const RuntimeFilterRole role, int node_id, IRuntimeFilter** res); @@ -191,11 +190,9 @@ public: // for ut const RuntimePredicateWrapper* get_wrapper(); - static Status create_wrapper(const MergeRuntimeFilterParams* param, MemTracker* tracker, - ObjectPool* pool, + static Status create_wrapper(const MergeRuntimeFilterParams* param, ObjectPool* pool, std::unique_ptr* wrapper); - static Status create_wrapper(const UpdateRuntimeFilterParams* param, MemTracker* tracker, - ObjectPool* pool, + static Status create_wrapper(const UpdateRuntimeFilterParams* param, ObjectPool* pool, std::unique_ptr* wrapper); void change_to_bloom_filter(); Status update_filter(const UpdateRuntimeFilterParams* param); @@ -234,11 +231,10 @@ protected: Status serialize_impl(T* request, void** data, int* len); template - static Status _create_wrapper(const T* param, MemTracker* tracker, ObjectPool* pool, + static Status _create_wrapper(const T* param, ObjectPool* pool, std::unique_ptr* wrapper); RuntimeState* _state; - MemTracker* _mem_tracker; ObjectPool* _pool; // _wrapper is a runtime filter function wrapper // _wrapper should alloc from _pool diff --git a/be/src/gutil/strings/numbers.cc b/be/src/gutil/strings/numbers.cc index 6cc76d2485..568b0ce2a4 100644 --- a/be/src/gutil/strings/numbers.cc +++ b/be/src/gutil/strings/numbers.cc @@ -1489,10 +1489,10 @@ string AccurateItoaKMGT(int64 i) { i = -i; } - string ret = StringPrintf("%s", sign); + string ret = std::to_string(i) + " : " + StringPrintf("%s", sign); int64 val; if ((val = (i >> 40)) > 1) { - ret += StringPrintf(" %" PRId64 "%s", val, "T"); + ret += StringPrintf("%" PRId64 "%s", val, "T"); i = i - (val << 40); } if ((val = (i >> 30)) > 1) { diff --git a/be/src/http/action/compaction_action.cpp b/be/src/http/action/compaction_action.cpp index b9548a320d..157e7ccc15 100644 --- a/be/src/http/action/compaction_action.cpp +++ b/be/src/http/action/compaction_action.cpp @@ -225,7 +225,7 @@ OLAPStatus CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet OLAPStatus status = OLAP_SUCCESS; if (compaction_type == PARAM_COMPACTION_BASE) { - BaseCompaction base_compaction(tablet, _compaction_mem_tracker); + BaseCompaction base_compaction(tablet); OLAPStatus res = base_compaction.compact(); if (res != OLAP_SUCCESS && res != OLAP_ERR_BE_NO_SUITABLE_VERSION) { DorisMetrics::instance()->base_compaction_request_failed->increment(1); @@ -234,7 +234,7 @@ OLAPStatus CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet } status = res; } else if (compaction_type == PARAM_COMPACTION_CUMULATIVE) { - CumulativeCompaction cumulative_compaction(tablet, _compaction_mem_tracker); + CumulativeCompaction cumulative_compaction(tablet); OLAPStatus res = cumulative_compaction.compact(); if (res != OLAP_SUCCESS && res != OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSION) { DorisMetrics::instance()->cumulative_compaction_request_failed->increment(1); diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index 2b7803344b..0495c6f5b7 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -150,15 +150,9 @@ void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstr string limit_str; string current_consumption_str; string peak_consumption_str; - if (!config::memory_leak_detection) { - limit_str = tracker->limit() == -1 ? "none" : ItoaKMGT(tracker->limit()); - current_consumption_str = ItoaKMGT(tracker->consumption()); - peak_consumption_str = ItoaKMGT(tracker->peak_consumption()); - } else { - limit_str = tracker->limit() == -1 ? "none" : AccurateItoaKMGT(tracker->limit()); - current_consumption_str = AccurateItoaKMGT(tracker->consumption()); - peak_consumption_str = AccurateItoaKMGT(tracker->peak_consumption()); - } + limit_str = tracker->limit() == -1 ? "none" : AccurateItoaKMGT(tracker->limit()); + current_consumption_str = AccurateItoaKMGT(tracker->consumption()); + peak_consumption_str = AccurateItoaKMGT(tracker->peak_consumption()); int64_t use_count = tracker.use_count(); (*output) << strings::Substitute( diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 141beed99f..953d7d52d3 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -24,7 +24,6 @@ #include "runtime/datetime_value.h" #include "runtime/decimalv2_value.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "runtime/string_value.h" #include "util/bitmap_value.h" @@ -488,8 +487,6 @@ struct AggregateFuncTraitsdata = reinterpret_cast(hll); - mem_pool->mem_tracker()->consume(hll->memory_consumed()); - agg_pool->add(hll); } @@ -534,7 +531,6 @@ struct AggregateFuncTraitssize = 0; auto bitmap = new BitmapValue(src_slice->data); - mem_pool->mem_tracker()->consume(sizeof(BitmapValue)); dst_slice->data = (char*)bitmap; agg_pool->add(bitmap); diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp index c34fb63d2d..986044ef44 100644 --- a/be/src/olap/base_compaction.cpp +++ b/be/src/olap/base_compaction.cpp @@ -19,11 +19,12 @@ #include "util/doris_metrics.h" #include "util/trace.h" +#include "runtime/thread_context.h" namespace doris { -BaseCompaction::BaseCompaction(TabletSharedPtr tablet, const std::shared_ptr& parent_tracker) - : Compaction(tablet, "BaseCompaction:" + std::to_string(tablet->tablet_id()), parent_tracker) {} +BaseCompaction::BaseCompaction(TabletSharedPtr tablet) + : Compaction(tablet, "BaseCompaction:" + std::to_string(tablet->tablet_id())) {} BaseCompaction::~BaseCompaction() {} diff --git a/be/src/olap/base_compaction.h b/be/src/olap/base_compaction.h index 61a50e6264..d4c2c2f360 100644 --- a/be/src/olap/base_compaction.h +++ b/be/src/olap/base_compaction.h @@ -29,7 +29,7 @@ namespace doris { class BaseCompaction : public Compaction { public: - BaseCompaction(TabletSharedPtr tablet, const std::shared_ptr& parent_tracker); + BaseCompaction(TabletSharedPtr tablet); ~BaseCompaction() override; OLAPStatus prepare_compact() override; diff --git a/be/src/olap/bloom_filter_predicate.cpp b/be/src/olap/bloom_filter_predicate.cpp index 4812735718..834df71989 100644 --- a/be/src/olap/bloom_filter_predicate.cpp +++ b/be/src/olap/bloom_filter_predicate.cpp @@ -41,19 +41,19 @@ ColumnPredicate* BloomFilterColumnPredicateFactory::create_column_predicate( switch (type) { #define M(NAME) \ case OLAP_FIELD_##NAME: { \ - filter.reset(create_bloom_filter(bloom_filter->tracker(), NAME)); \ + filter.reset(create_bloom_filter(NAME)); \ filter->light_copy(bloom_filter.get()); \ return new BloomFilterColumnPredicate(column_id, filter); \ } APPLY_FOR_PRIMTYPE(M) #undef M case OLAP_FIELD_TYPE_DECIMAL: { - filter.reset(create_bloom_filter(bloom_filter->tracker(), TYPE_DECIMALV2)); + filter.reset(create_bloom_filter(TYPE_DECIMALV2)); filter->light_copy(bloom_filter.get()); return new BloomFilterColumnPredicate(column_id, filter); } case OLAP_FIELD_TYPE_BOOL: { - filter.reset(create_bloom_filter(bloom_filter->tracker(), TYPE_BOOLEAN)); + filter.reset(create_bloom_filter(TYPE_BOOLEAN)); filter->light_copy(bloom_filter.get()); return new BloomFilterColumnPredicate(column_id, filter); } diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index f02b60813f..bb486d5e49 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -26,16 +26,9 @@ using std::vector; namespace doris { -Compaction::Compaction(TabletSharedPtr tablet, const std::string& label, - const std::shared_ptr& parent_tracker) +Compaction::Compaction(TabletSharedPtr tablet, const std::string& label) : _mem_tracker( - MemTracker::create_tracker(-1, label, parent_tracker, MemTrackerLevel::INSTANCE)), - _readers_tracker(MemTracker::create_tracker( - -1, "CompactionReaderTracker:" + std::to_string(tablet->tablet_id()), - _mem_tracker)), - _writer_tracker(MemTracker::create_tracker( - -1, "CompationWriterTracker:" + std::to_string(tablet->tablet_id()), - _mem_tracker)), + MemTracker::create_tracker(-1, label, nullptr, MemTrackerLevel::INSTANCE)), _tablet(tablet), _input_rowsets_size(0), _input_row_num(0), @@ -144,7 +137,8 @@ OLAPStatus Compaction::do_compaction_impl(int64_t permits) { << ", output_version=" << _output_version << ", current_max_version=" << current_max_version << ", disk=" << _tablet->data_dir()->path() << ", segments=" << segments_num - << ". elapsed time=" << watch.get_elapse_second() << "s. cumulative_compaction_policy=" + << ". elapsed time=" << watch.get_elapse_second() + << "s. cumulative_compaction_policy=" << _tablet->cumulative_compaction_policy()->name() << "."; return OLAP_SUCCESS; @@ -166,7 +160,6 @@ OLAPStatus Compaction::construct_output_rowset_writer() { context.rowset_state = VISIBLE; context.version = _output_version; context.segments_overlap = NONOVERLAPPING; - context.parent_mem_tracker = _writer_tracker; // The test results show that one rs writer is low-memory-footprint, there is no need to tracker its mem pool RETURN_NOT_OK(RowsetFactory::create_rowset_writer(context, &_output_rs_writer)); return OLAP_SUCCESS; @@ -175,11 +168,7 @@ OLAPStatus Compaction::construct_output_rowset_writer() { OLAPStatus Compaction::construct_input_rowset_readers() { for (auto& rowset : _input_rowsets) { RowsetReaderSharedPtr rs_reader; - RETURN_NOT_OK(rowset->create_reader( - MemTracker::create_tracker( - -1, "Compaction:RowsetReader:" + rowset->rowset_id().to_string(), - _readers_tracker), - &rs_reader)); + RETURN_NOT_OK(rowset->create_reader(&rs_reader)); _input_rs_readers.push_back(std::move(rs_reader)); } return OLAP_SUCCESS; @@ -298,4 +287,4 @@ int64_t Compaction::get_compaction_permits() { return permits; } -} // namespace doris +} // namespace doris diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h index 6e7985a4d1..3a41f3fe73 100644 --- a/be/src/olap/compaction.h +++ b/be/src/olap/compaction.h @@ -44,8 +44,7 @@ class Merger; // 4. gc output rowset if failed class Compaction { public: - Compaction(TabletSharedPtr tablet, const std::string& label, - const std::shared_ptr& parent_tracker); + Compaction(TabletSharedPtr tablet, const std::string& label); virtual ~Compaction(); // This is only for http CompactionAction @@ -84,11 +83,6 @@ protected: // the root tracker for this compaction std::shared_ptr _mem_tracker; - // the child of root, only track rowset readers mem - std::shared_ptr _readers_tracker; - - // the child of root, only track rowset writer mem - std::shared_ptr _writer_tracker; TabletSharedPtr _tablet; std::vector _input_rowsets; diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index 58ff0cb3bf..0e721a838f 100644 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -23,8 +23,8 @@ namespace doris { -CumulativeCompaction::CumulativeCompaction(TabletSharedPtr tablet, const std::shared_ptr& parent_tracker) - : Compaction(tablet, "CumulativeCompaction:" + std::to_string(tablet->tablet_id()), parent_tracker) {} +CumulativeCompaction::CumulativeCompaction(TabletSharedPtr tablet) + : Compaction(tablet, "CumulativeCompaction:" + std::to_string(tablet->tablet_id())) {} CumulativeCompaction::~CumulativeCompaction() {} diff --git a/be/src/olap/cumulative_compaction.h b/be/src/olap/cumulative_compaction.h index c5c991a43e..d7c26ed669 100644 --- a/be/src/olap/cumulative_compaction.h +++ b/be/src/olap/cumulative_compaction.h @@ -27,7 +27,7 @@ namespace doris { class CumulativeCompaction : public Compaction { public: - CumulativeCompaction(TabletSharedPtr tablet, const std::shared_ptr& parent_tracker); + CumulativeCompaction(TabletSharedPtr tablet); ~CumulativeCompaction() override; OLAPStatus prepare_compact() override; diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 1060710887..2080aa3786 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -29,22 +29,19 @@ namespace doris { -OLAPStatus DeltaWriter::open(WriteRequest* req, const std::shared_ptr& parent, - DeltaWriter** writer) { - *writer = new DeltaWriter(req, parent, StorageEngine::instance()); +OLAPStatus DeltaWriter::open(WriteRequest* req, DeltaWriter** writer) { + *writer = new DeltaWriter(req, StorageEngine::instance()); return OLAP_SUCCESS; } -DeltaWriter::DeltaWriter(WriteRequest* req, const std::shared_ptr& parent, - StorageEngine* storage_engine) +DeltaWriter::DeltaWriter(WriteRequest* req, StorageEngine* storage_engine) : _req(*req), _tablet(nullptr), _cur_rowset(nullptr), _rowset_writer(nullptr), _tablet_schema(nullptr), _delta_written_success(false), - _storage_engine(storage_engine), - _parent_mem_tracker(parent) {} + _storage_engine(storage_engine) {} DeltaWriter::~DeltaWriter() { if (_is_init && !_delta_written_success) { @@ -97,8 +94,8 @@ OLAPStatus DeltaWriter::init() { return OLAP_ERR_TABLE_NOT_FOUND; } - _mem_tracker = MemTracker::create_tracker( - -1, "DeltaWriter:" + std::to_string(_tablet->tablet_id()), _parent_mem_tracker); + _mem_tracker = + MemTracker::create_tracker(-1, "DeltaWriter:" + std::to_string(_tablet->tablet_id())); // check tablet version number if (_tablet->version_count() > config::max_tablet_version_num) { LOG(WARNING) << "failed to init delta writer. version count: " << _tablet->version_count() @@ -134,7 +131,6 @@ OLAPStatus DeltaWriter::init() { writer_context.txn_id = _req.txn_id; writer_context.load_id = _req.load_id; writer_context.segments_overlap = OVERLAPPING; - writer_context.parent_mem_tracker = _mem_tracker; RETURN_NOT_OK(RowsetFactory::create_rowset_writer(writer_context, &_rowset_writer)); _tablet_schema = &(_tablet->tablet_schema()); @@ -254,8 +250,7 @@ OLAPStatus DeltaWriter::wait_flush() { void DeltaWriter::_reset_mem_table() { _mem_table.reset(new MemTable(_tablet->tablet_id(), _schema.get(), _tablet_schema, _req.slots, - _req.tuple_desc, _tablet->keys_type(), _rowset_writer.get(), - _mem_tracker)); + _req.tuple_desc, _tablet->keys_type(), _rowset_writer.get())); } OLAPStatus DeltaWriter::close() { diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h index dbea5694a7..c46dad3f85 100644 --- a/be/src/olap/delta_writer.h +++ b/be/src/olap/delta_writer.h @@ -54,8 +54,7 @@ struct WriteRequest { // This class is NOT thread-safe, external synchronization is required. class DeltaWriter { public: - static OLAPStatus open(WriteRequest* req, const std::shared_ptr& parent, - DeltaWriter** writer); + static OLAPStatus open(WriteRequest* req, DeltaWriter** writer); ~DeltaWriter(); @@ -90,8 +89,7 @@ public: int64_t tablet_id() { return _tablet->tablet_id(); } private: - DeltaWriter(WriteRequest* req, const std::shared_ptr& parent, - StorageEngine* storage_engine); + DeltaWriter(WriteRequest* req, StorageEngine* storage_engine); // push a full memtable to flush executor OLAPStatus _flush_memtable_async(); @@ -114,7 +112,6 @@ private: StorageEngine* _storage_engine; std::unique_ptr _flush_token; - std::shared_ptr _parent_mem_tracker; std::shared_ptr _mem_tracker; // The counter of number of segment flushed already. diff --git a/be/src/olap/generic_iterators.cpp b/be/src/olap/generic_iterators.cpp index 0d31955aad..a499f28ac6 100644 --- a/be/src/olap/generic_iterators.cpp +++ b/be/src/olap/generic_iterators.cpp @@ -113,8 +113,7 @@ Status AutoIncrementIterator::next_batch(RowBlockV2* block) { // } class MergeIteratorContext { public: - MergeIteratorContext(RowwiseIterator* iter, std::shared_ptr parent) - : _iter(iter), _block(iter->schema(), 1024, std::move(parent)) {} + MergeIteratorContext(RowwiseIterator* iter) : _iter(iter), _block(iter->schema(), 1024) {} MergeIteratorContext(const MergeIteratorContext&) = delete; MergeIteratorContext(MergeIteratorContext&&) = delete; @@ -207,11 +206,10 @@ Status MergeIteratorContext::_load_next_block() { class MergeIterator : public RowwiseIterator { public: // MergeIterator takes the ownership of input iterators - MergeIterator(std::vector iters, std::shared_ptr parent, int sequence_id_idx) - : _origin_iters(std::move(iters)), _sequence_id_idx(sequence_id_idx), _merge_heap(MergeContextComparator(_sequence_id_idx)) { - // use for count the mem use of Block use in Merge - _mem_tracker = MemTracker::create_tracker(-1, "MergeIterator", std::move(parent)); - } + MergeIterator(std::vector iters, int sequence_id_idx) + : _origin_iters(std::move(iters)), + _sequence_id_idx(sequence_id_idx), + _merge_heap(MergeContextComparator(_sequence_id_idx)) {} ~MergeIterator() override { while (!_merge_heap.empty()) { @@ -245,7 +243,7 @@ private: if (cmp_res != 0) { return cmp_res > 0; } - + // Second: If sequence_id_idx != 0 means we need to compare sequence. sequence only use // in unique key. so keep reverse order of sequence id here if (sequence_id_idx != -1) { @@ -278,7 +276,7 @@ Status MergeIterator::init(const StorageReadOptions& opts) { _schema.reset(new Schema((*(_origin_iters.begin()))->schema())); for (auto iter : _origin_iters) { - std::unique_ptr ctx(new MergeIteratorContext(iter, _mem_tracker)); + std::unique_ptr ctx(new MergeIteratorContext(iter)); RETURN_IF_ERROR(ctx->init(opts)); if (!ctx->valid()) { continue; @@ -323,10 +321,7 @@ public: // Iterators' ownership it transfered to this class. // This class will delete all iterators when destructs // Client should not use iterators any more. - UnionIterator(std::vector &v, std::shared_ptr parent) - : _origin_iters(v.begin(), v.end()) { - _mem_tracker = MemTracker::create_tracker(-1, "UnionIterator", parent); - } + UnionIterator(std::vector& v) : _origin_iters(v.begin(), v.end()) {} ~UnionIterator() override { std::for_each(_origin_iters.begin(), _origin_iters.end(), std::default_delete()); @@ -374,18 +369,18 @@ Status UnionIterator::next_batch(RowBlockV2* block) { return Status::EndOfFile("End of UnionIterator"); } -RowwiseIterator* new_merge_iterator(std::vector inputs, std::shared_ptr parent, int sequence_id_idx) { +RowwiseIterator* new_merge_iterator(std::vector inputs, int sequence_id_idx) { if (inputs.size() == 1) { return *(inputs.begin()); } - return new MergeIterator(std::move(inputs), parent, sequence_id_idx); + return new MergeIterator(std::move(inputs), sequence_id_idx); } -RowwiseIterator* new_union_iterator(std::vector& inputs, std::shared_ptr parent) { +RowwiseIterator* new_union_iterator(std::vector& inputs) { if (inputs.size() == 1) { return *(inputs.begin()); } - return new UnionIterator(inputs, parent); + return new UnionIterator(inputs); } RowwiseIterator* new_auto_increment_iterator(const Schema& schema, size_t num_rows) { diff --git a/be/src/olap/generic_iterators.h b/be/src/olap/generic_iterators.h index e8f4528885..5ff287b8d7 100644 --- a/be/src/olap/generic_iterators.h +++ b/be/src/olap/generic_iterators.h @@ -25,14 +25,14 @@ namespace doris { // // Inputs iterators' ownership is taken by created merge iterator. And client // should delete returned iterator after usage. -RowwiseIterator* new_merge_iterator(std::vector inputs, std::shared_ptr parent, int sequence_id_idx); +RowwiseIterator* new_merge_iterator(std::vector inputs, int sequence_id_idx); // Create a union iterator for input iterators. Union iterator will read // input iterators one by one. // // Inputs iterators' ownership is taken by created union iterator. And client // should delete returned iterator after usage. -RowwiseIterator* new_union_iterator(std::vector& inputs, std::shared_ptr parent); +RowwiseIterator* new_union_iterator(std::vector& inputs); // Create an auto increment iterator which returns num_rows data in format of schema. // This class aims to be used in unit test. diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index 4cdfc60578..4609bd0a52 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -114,9 +114,6 @@ public: // Return the data id such as segment id, used for keep the insert order when do // merge sort in priority queue virtual uint64_t data_id() const { return 0; } - -protected: - std::shared_ptr _mem_tracker; }; } // namespace doris diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index 5d2151f97e..ac494819c9 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -16,6 +16,7 @@ #include "olap/olap_index.h" #include "olap/row_block.h" #include "olap/utils.h" +#include "runtime/thread_context.h" #include "util/doris_metrics.h" using std::string; @@ -343,7 +344,7 @@ Cache::Handle* LRUCache::insert(const CacheKey& key, uint32_t hash, void* value, return reinterpret_cast(e); } -void LRUCache::erase(const CacheKey& key, uint32_t hash) { +void LRUCache::erase(const CacheKey& key, uint32_t hash, MemTracker* tracker) { LRUHandle* e = nullptr; bool last_ref = false; { @@ -364,6 +365,11 @@ void LRUCache::erase(const CacheKey& key, uint32_t hash) { // free handle out of mutex, when last_ref is true, e must not be nullptr if (last_ref) { e->free(); + // The parameter tracker is ShardedLRUCache::_mem_tracker, + // because the memory released by LRUHandle is recorded in the tls mem tracker, + // so this part of the memory is subsidized from ShardedLRUCache::_mem_tracker to the tls mem tracker + tracker->transfer_to(thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker().get(), + e->charge); } } @@ -465,12 +471,15 @@ ShardedLRUCache::~ShardedLRUCache() { } _entity->deregister_hook(_name); DorisMetrics::instance()->metric_registry()->deregister_entity(_entity); - _mem_tracker->release(_mem_tracker->consumption()); } Cache::Handle* ShardedLRUCache::insert(const CacheKey& key, void* value, size_t charge, void (*deleter)(const CacheKey& key, void* value), CachePriority priority) { + // The memory of the parameter value should be recorded in the tls mem tracker, + // transfer the memory ownership of the value to ShardedLRUCache::_mem_tracker. + thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker()->transfer_to(_mem_tracker.get(), + charge); const uint32_t hash = _hash_slice(key); return _shards[_shard(hash)]->insert(key, hash, value, charge, deleter, priority); } @@ -487,7 +496,7 @@ void ShardedLRUCache::release(Handle* handle) { void ShardedLRUCache::erase(const CacheKey& key) { const uint32_t hash = _hash_slice(key); - _shards[_shard(hash)]->erase(key, hash); + _shards[_shard(hash)]->erase(key, hash, _mem_tracker.get()); } void* ShardedLRUCache::value(Handle* handle) { @@ -538,8 +547,6 @@ void ShardedLRUCache::update_cache_metrics() const { usage_ratio->set_value(total_capacity == 0 ? 0 : ((double)total_usage / total_capacity)); hit_ratio->set_value(total_lookup_count == 0 ? 0 : ((double)total_hit_count / total_lookup_count)); - - _mem_tracker->consume(total_usage - _mem_tracker->consumption()); } Cache* new_lru_cache(const std::string& name, size_t capacity) { diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h index e666a45e0b..fd81cd8acd 100644 --- a/be/src/olap/lru_cache.h +++ b/be/src/olap/lru_cache.h @@ -316,7 +316,7 @@ public: CachePriority priority = CachePriority::NORMAL); Cache::Handle* lookup(const CacheKey& key, uint32_t hash); void release(Cache::Handle* handle); - void erase(const CacheKey& key, uint32_t hash); + void erase(const CacheKey& key, uint32_t hash, MemTracker* tracker); int64_t prune(); int64_t prune_if(CacheValuePredicate pred); diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index 69623575e2..8c56e3895e 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -31,14 +31,13 @@ namespace doris { MemTable::MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet_schema, const std::vector* slot_descs, TupleDescriptor* tuple_desc, - KeysType keys_type, RowsetWriter* rowset_writer, - const std::shared_ptr& parent_tracker) + KeysType keys_type, RowsetWriter* rowset_writer) : _tablet_id(tablet_id), _schema(schema), _tablet_schema(tablet_schema), _slot_descs(slot_descs), _keys_type(keys_type), - _mem_tracker(MemTracker::create_tracker(-1, "MemTable", parent_tracker)), + _mem_tracker(MemTracker::create_tracker(-1, "MemTable")), _buffer_mem_pool(new MemPool(_mem_tracker.get())), _table_mem_pool(new MemPool(_mem_tracker.get())), _schema_size(_schema->schema_size()), diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h index 6849bf45f4..db0e6a12cf 100644 --- a/be/src/olap/memtable.h +++ b/be/src/olap/memtable.h @@ -40,8 +40,7 @@ class MemTable { public: MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet_schema, const std::vector* slot_descs, TupleDescriptor* tuple_desc, - KeysType keys_type, RowsetWriter* rowset_writer, - const std::shared_ptr& parent_tracker); + KeysType keys_type, RowsetWriter* rowset_writer); ~MemTable(); int64_t tablet_id() const { return _tablet_id; } diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 9b85206b37..9d7855efed 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -905,9 +905,7 @@ OLAPStatus PushBrokerReader::init(const Schema* schema, const TBrokerScanRange& } _runtime_profile = _runtime_state->runtime_profile(); _runtime_profile->set_name("PushBrokerReader"); - _mem_tracker = MemTracker::create_tracker(-1, "PushBrokerReader", - _runtime_state->instance_mem_tracker()); - _mem_pool.reset(new MemPool(_mem_tracker.get())); + _mem_pool.reset(new MemPool("PushBrokerReader")); _counter.reset(new ScannerCounter()); // init scanner diff --git a/be/src/olap/push_handler.h b/be/src/olap/push_handler.h index 60ed4f5815..767cbc0e0a 100644 --- a/be/src/olap/push_handler.h +++ b/be/src/olap/push_handler.h @@ -211,7 +211,6 @@ private: const Schema* _schema; std::unique_ptr _runtime_state; RuntimeProfile* _runtime_profile; - std::shared_ptr _mem_tracker; std::unique_ptr _mem_pool; std::unique_ptr _counter; std::unique_ptr _scanner; diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index f73d1f48b6..07927ba10e 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -37,7 +37,6 @@ #include "olap/storage_engine.h" #include "olap/tablet.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "runtime/string_value.hpp" #include "util/date_func.h" #include "util/mem_util.hpp" @@ -107,7 +106,7 @@ TabletReader::~TabletReader() { } OLAPStatus TabletReader::init(const ReaderParams& read_params) { - _predicate_mem_pool.reset(new MemPool(read_params.tablet->full_name())); + _predicate_mem_pool.reset(new MemPool("TabletReader:" + read_params.tablet->full_name())); OLAPStatus res = _init_params(read_params); if (res != OLAP_SUCCESS) { diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp index 1b041c80d0..061972edd3 100644 --- a/be/src/olap/row_block.cpp +++ b/be/src/olap/row_block.cpp @@ -37,10 +37,8 @@ using std::vector; namespace doris { -RowBlock::RowBlock(const TabletSchema* schema, const std::shared_ptr& parent_tracker) - : _capacity(0), _schema(schema) { - _tracker = MemTracker::create_tracker(-1, "RowBlock", parent_tracker, MemTrackerLevel::VERBOSE); - _mem_pool.reset(new MemPool(_tracker.get())); +RowBlock::RowBlock(const TabletSchema* schema) : _capacity(0), _schema(schema) { + _mem_pool.reset(new MemPool("RowBlock")); } RowBlock::~RowBlock() { @@ -90,7 +88,8 @@ void RowBlock::_compute_layout() { // All field has a nullbyte in memory if (column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_HLL || - column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_OBJECT ||column.type() == OLAP_FIELD_TYPE_STRING) { + column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_OBJECT || + column.type() == OLAP_FIELD_TYPE_STRING) { // 变长部分额外计算下实际最大的字符串长度(此处length已经包括记录Length的2个字节) memory_size += sizeof(Slice) + sizeof(char); } else { diff --git a/be/src/olap/row_block.h b/be/src/olap/row_block.h index 75924fa632..6b1dd02554 100644 --- a/be/src/olap/row_block.h +++ b/be/src/olap/row_block.h @@ -57,8 +57,7 @@ class RowBlock { friend class VectorizedRowBatch; public: - RowBlock(const TabletSchema* schema, - const std::shared_ptr& parent_tracker = nullptr); + RowBlock(const TabletSchema* schema); // 注意回收内部buffer ~RowBlock(); @@ -136,7 +135,6 @@ private: size_t _limit = 0; uint8_t _block_status = DEL_PARTIAL_SATISFIED; - std::shared_ptr _tracker; std::unique_ptr _mem_pool; // 由于内部持有内存资源,所以这里禁止拷贝和赋值 DISALLOW_COPY_AND_ASSIGN(RowBlock); diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index 4d0a3bba8d..73aca948eb 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -33,14 +33,10 @@ using strings::Substitute; namespace doris { RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity) - : RowBlockV2(schema, capacity, nullptr) {} - -RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity, std::shared_ptr parent) : _schema(schema), _capacity(capacity), _column_vector_batches(_schema.num_columns()), - _tracker(MemTracker::create_tracker(-1, "RowBlockV2", std::move(parent))), - _pool(new MemPool(_tracker.get())), + _pool(new MemPool("RowBlockV2")), _selection_vector(nullptr) { for (auto cid : _schema.column_ids()) { Status status = ColumnVectorBatch::create( diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h index 7f2b79d638..35c4a48996 100644 --- a/be/src/olap/row_block2.h +++ b/be/src/olap/row_block2.h @@ -28,7 +28,6 @@ #include "olap/selection_vector.h" #include "olap/types.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" namespace doris { @@ -43,7 +42,6 @@ class RowBlockV2 { public: RowBlockV2(const Schema& schema, uint16_t capacity); - RowBlockV2(const Schema& schema, uint16_t capacity, std::shared_ptr parent); ~RowBlockV2(); // update number of rows contained in this block @@ -119,7 +117,6 @@ private: size_t _num_rows; // manages the memory for slice's data - std::shared_ptr _tracker; std::unique_ptr _pool; // index of selected rows for rows passed the predicate diff --git a/be/src/olap/rowset/alpha_rowset.cpp b/be/src/olap/rowset/alpha_rowset.cpp index 53c695f4d6..9e3ff54092 100644 --- a/be/src/olap/rowset/alpha_rowset.cpp +++ b/be/src/olap/rowset/alpha_rowset.cpp @@ -55,14 +55,6 @@ OLAPStatus AlphaRowset::create_reader(std::shared_ptr* result) { return OLAP_SUCCESS; } -OLAPStatus AlphaRowset::create_reader(const std::shared_ptr& parent_tracker, - std::shared_ptr* result) { - result->reset(new AlphaRowsetReader(_schema->num_rows_per_row_block(), - std::static_pointer_cast(shared_from_this()), - parent_tracker)); - return OLAP_SUCCESS; -} - OLAPStatus AlphaRowset::remove() { VLOG_NOTICE << "begin to remove files in rowset " << unique_id() << ", version:" << start_version() << "-" << end_version() << ", tabletid:" << _rowset_meta->tablet_id(); diff --git a/be/src/olap/rowset/alpha_rowset.h b/be/src/olap/rowset/alpha_rowset.h index 364d59dc78..8a5b4d9dc5 100644 --- a/be/src/olap/rowset/alpha_rowset.h +++ b/be/src/olap/rowset/alpha_rowset.h @@ -41,9 +41,6 @@ public: OLAPStatus create_reader(std::shared_ptr* result) override; - OLAPStatus create_reader(const std::shared_ptr& parent_tracker, - std::shared_ptr* result) override; - OLAPStatus split_range(const RowCursor& start_key, const RowCursor& end_key, uint64_t request_block_row_count, size_t key_num, std::vector* ranges) override; diff --git a/be/src/olap/rowset/alpha_rowset_reader.cpp b/be/src/olap/rowset/alpha_rowset_reader.cpp index b22bfa1fb5..e883680f50 100644 --- a/be/src/olap/rowset/alpha_rowset_reader.cpp +++ b/be/src/olap/rowset/alpha_rowset_reader.cpp @@ -22,11 +22,9 @@ namespace doris { -AlphaRowsetReader::AlphaRowsetReader(int num_rows_per_row_block, AlphaRowsetSharedPtr rowset, - const std::shared_ptr& parent_tracker) +AlphaRowsetReader::AlphaRowsetReader(int num_rows_per_row_block, AlphaRowsetSharedPtr rowset) : _num_rows_per_row_block(num_rows_per_row_block), _rowset(std::move(rowset)), - _parent_tracker(parent_tracker), _alpha_rowset_meta( std::static_pointer_cast(_rowset->rowset_meta()).get()), _segment_groups(_rowset->_segment_groups), @@ -68,8 +66,7 @@ OLAPStatus AlphaRowsetReader::init(RowsetReaderContext* read_context) { if (_current_read_context->need_ordered_result && _is_segments_overlapping && _sequential_ctxs.size() > 1) { _next_block = &AlphaRowsetReader::_merge_block; - _read_block.reset(new (std::nothrow) - RowBlock(_current_read_context->tablet_schema, _parent_tracker)); + _read_block.reset(new (std::nothrow) RowBlock(_current_read_context->tablet_schema)); if (_read_block == nullptr) { LOG(WARNING) << "new row block failed in reader"; return OLAP_ERR_MALLOC_ERROR; @@ -322,8 +319,7 @@ OLAPStatus AlphaRowsetReader::_init_merge_ctxs(RowsetReaderContext* read_context const bool use_index_stream_cache = read_context->reader_type == READER_QUERY; for (auto& segment_group : _segment_groups) { - std::unique_ptr new_column_data( - ColumnData::create(segment_group.get(), _parent_tracker)); + std::unique_ptr new_column_data(ColumnData::create(segment_group.get())); OLAPStatus status = new_column_data->init(); if (status != OLAP_SUCCESS) { LOG(WARNING) << "init column data failed"; diff --git a/be/src/olap/rowset/alpha_rowset_reader.h b/be/src/olap/rowset/alpha_rowset_reader.h index 7c5b593330..018d78153c 100644 --- a/be/src/olap/rowset/alpha_rowset_reader.h +++ b/be/src/olap/rowset/alpha_rowset_reader.h @@ -52,8 +52,7 @@ struct AlphaMergeContextComparator { class AlphaRowsetReader : public RowsetReader { public: - AlphaRowsetReader(int num_rows_per_row_block, AlphaRowsetSharedPtr rowset, - const std::shared_ptr& parent_tracker = nullptr); + AlphaRowsetReader(int num_rows_per_row_block, AlphaRowsetSharedPtr rowset); ~AlphaRowsetReader() override; @@ -103,7 +102,6 @@ private: private: int _num_rows_per_row_block; AlphaRowsetSharedPtr _rowset; - std::shared_ptr _parent_tracker; std::string _rowset_path; AlphaRowsetMeta* _alpha_rowset_meta; const std::vector>& _segment_groups; diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 302e6ac442..3c9f1157bc 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -74,14 +74,6 @@ OLAPStatus BetaRowset::create_reader(RowsetReaderSharedPtr* result) { return OLAP_SUCCESS; } -OLAPStatus BetaRowset::create_reader(const std::shared_ptr& parent_tracker, - std::shared_ptr* result) { - // NOTE: We use std::static_pointer_cast for performance - result->reset(new BetaRowsetReader(std::static_pointer_cast(shared_from_this()), - parent_tracker)); - return OLAP_SUCCESS; -} - OLAPStatus BetaRowset::split_range(const RowCursor& start_key, const RowCursor& end_key, uint64_t request_block_row_count, size_t key_num, std::vector* ranges) { diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index 5030b298e1..81ad134904 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -39,9 +39,6 @@ public: OLAPStatus create_reader(RowsetReaderSharedPtr* result) override; - OLAPStatus create_reader(const std::shared_ptr& parent_tracker, - std::shared_ptr* result) override; - static FilePathDesc segment_file_path(const FilePathDesc& segment_dir_desc, const RowsetId& rowset_id, int segment_id); diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 4e475fa9b1..c0c3bad46c 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -31,21 +31,14 @@ namespace doris { -BetaRowsetReader::BetaRowsetReader(BetaRowsetSharedPtr rowset, - std::shared_ptr parent_tracker) +BetaRowsetReader::BetaRowsetReader(BetaRowsetSharedPtr rowset) : _context(nullptr), _rowset(std::move(rowset)), - _stats(&_owned_stats), - _parent_tracker(std::move(parent_tracker)) { + _stats(&_owned_stats) { _rowset->acquire(); } OLAPStatus BetaRowsetReader::init(RowsetReaderContext* read_context) { - // If do not init the RowsetReader with a parent_tracker, use the runtime_state instance_mem_tracker - if (_parent_tracker == nullptr && read_context->runtime_state != nullptr) { - _parent_tracker = read_context->runtime_state->instance_mem_tracker(); - } - RETURN_NOT_OK(_rowset->load()); _context = read_context; if (_context->stats != nullptr) { @@ -102,7 +95,7 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext* read_context) { std::vector> seg_iterators; for (auto& seg_ptr : _segment_cache_handle.get_segments()) { std::unique_ptr iter; - auto s = seg_ptr->new_iterator(*_schema, read_options, _parent_tracker, &iter); + auto s = seg_ptr->new_iterator(*_schema, read_options, &iter); if (!s.ok()) { LOG(WARNING) << "failed to create iterator[" << seg_ptr->id() << "]: " << s.to_string(); return OLAP_ERR_ROWSET_READER_INIT; @@ -120,15 +113,15 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext* read_context) { RowwiseIterator* final_iterator; if (config::enable_storage_vectorization && read_context->is_vec) { if (read_context->need_ordered_result && _rowset->rowset_meta()->is_segments_overlapping()) { - final_iterator = vectorized::new_merge_iterator(iterators, _parent_tracker, read_context->sequence_id_idx); + final_iterator = vectorized::new_merge_iterator(iterators, read_context->sequence_id_idx); } else { - final_iterator = vectorized::new_union_iterator(iterators, _parent_tracker); + final_iterator = vectorized::new_union_iterator(iterators); } } else { if (read_context->need_ordered_result && _rowset->rowset_meta()->is_segments_overlapping()) { - final_iterator = new_merge_iterator(iterators, _parent_tracker, read_context->sequence_id_idx); + final_iterator = new_merge_iterator(iterators, read_context->sequence_id_idx); } else { - final_iterator = new_union_iterator(iterators, _parent_tracker); + final_iterator = new_union_iterator(iterators); } } @@ -141,11 +134,11 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext* read_context) { // init input block _input_block.reset(new RowBlockV2(*_schema, - std::min(1024, read_context->batch_size), _parent_tracker)); + std::min(1024, read_context->batch_size))); if (!read_context->is_vec) { // init input/output block and row - _output_block.reset(new RowBlock(read_context->tablet_schema, _parent_tracker)); + _output_block.reset(new RowBlock(read_context->tablet_schema)); RowBlockInfo output_block_info; output_block_info.row_num = std::min(1024, read_context->batch_size); diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h index ad349b7959..eeddef80da 100644 --- a/be/src/olap/rowset/beta_rowset_reader.h +++ b/be/src/olap/rowset/beta_rowset_reader.h @@ -30,8 +30,7 @@ namespace doris { class BetaRowsetReader : public RowsetReader { public: - BetaRowsetReader(BetaRowsetSharedPtr rowset, - std::shared_ptr parent_tracker = nullptr); + BetaRowsetReader(BetaRowsetSharedPtr rowset); ~BetaRowsetReader() override { _rowset->release(); } @@ -62,8 +61,6 @@ private: OlapReaderStatistics _owned_stats; OlapReaderStatistics* _stats; - std::shared_ptr _parent_tracker; - std::unique_ptr _iterator; std::unique_ptr _input_block; diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 4b68b39059..ae4ce2cb98 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -224,8 +224,8 @@ OLAPStatus BetaRowsetWriter::_create_segment_writer(std::unique_ptrreset(new segment_v2::SegmentWriter(wblock.get(), _num_segment, - _context.tablet_schema, writer_options, _context.parent_mem_tracker)); + writer->reset(new segment_v2::SegmentWriter(wblock.get(), _num_segment, _context.tablet_schema, + writer_options)); { std::lock_guard l(_lock); _wblocks.push_back(std::move(wblock)); diff --git a/be/src/olap/rowset/column_data.cpp b/be/src/olap/rowset/column_data.cpp index 224367480c..178710603b 100644 --- a/be/src/olap/rowset/column_data.cpp +++ b/be/src/olap/rowset/column_data.cpp @@ -24,16 +24,13 @@ namespace doris { -ColumnData* ColumnData::create(SegmentGroup* segment_group, - const std::shared_ptr& parent_tracker) { - ColumnData* data = new (std::nothrow) ColumnData(segment_group, parent_tracker); +ColumnData* ColumnData::create(SegmentGroup* segment_group) { + ColumnData* data = new (std::nothrow) ColumnData(segment_group); return data; } -ColumnData::ColumnData(SegmentGroup* segment_group, - const std::shared_ptr& parent_tracker) +ColumnData::ColumnData(SegmentGroup* segment_group) : _segment_group(segment_group), - _parent_tracker(parent_tracker), _eof(false), _conditions(nullptr), _col_predicates(nullptr), @@ -138,7 +135,7 @@ OLAPStatus ColumnData::_seek_to_block(const RowBlockPosition& block_pos, bool wi _segment_reader = new (std::nothrow) SegmentReader(file_name, segment_group(), block_pos.segment, _seek_columns, _load_bf_columns, _conditions, _delete_handler, _delete_status, - _lru_cache, _runtime_state, _stats, _parent_tracker); + _lru_cache, _runtime_state, _stats); if (_segment_reader == nullptr) { OLAP_LOG_WARNING("fail to malloc segment reader."); return OLAP_ERR_MALLOC_ERROR; @@ -435,14 +432,12 @@ void ColumnData::set_read_params(const std::vector& return_columns, } _read_vector_batch.reset(new VectorizedRowBatch(&(_segment_group->get_tablet_schema()), - _return_columns, _num_rows_per_block, - _parent_tracker)); + _return_columns, _num_rows_per_block)); _seek_vector_batch.reset(new VectorizedRowBatch(&(_segment_group->get_tablet_schema()), - _seek_columns, _num_rows_per_block, - _parent_tracker)); + _seek_columns, _num_rows_per_block)); - _read_block.reset(new RowBlock(&(_segment_group->get_tablet_schema()), _parent_tracker)); + _read_block.reset(new RowBlock(&(_segment_group->get_tablet_schema()))); RowBlockInfo block_info; block_info.row_num = _num_rows_per_block; block_info.null_supported = true; @@ -580,7 +575,7 @@ OLAPStatus ColumnData::schema_change_init() { _read_vector_batch.reset(new VectorizedRowBatch(&(_segment_group->get_tablet_schema()), _return_columns, _num_rows_per_block)); - _read_block.reset(new RowBlock(&(_segment_group->get_tablet_schema()), _parent_tracker)); + _read_block.reset(new RowBlock(&(_segment_group->get_tablet_schema()))); RowBlockInfo block_info; block_info.row_num = _num_rows_per_block; diff --git a/be/src/olap/rowset/column_data.h b/be/src/olap/rowset/column_data.h index e2565b4c4b..c5ad741019 100644 --- a/be/src/olap/rowset/column_data.h +++ b/be/src/olap/rowset/column_data.h @@ -39,10 +39,8 @@ class SegmentReader; // This class is column data reader. this class will be used in two case. class ColumnData { public: - static ColumnData* create(SegmentGroup* segment_group, - const std::shared_ptr& parent_tracker = nullptr); - ColumnData(SegmentGroup* segment_group, - const std::shared_ptr& parent_tracker = nullptr); + static ColumnData* create(SegmentGroup* segment_group); + ColumnData(SegmentGroup* segment_group); ~ColumnData(); // 为了与之前兼容, 暴露部分index的接口 @@ -139,7 +137,6 @@ private: private: SegmentGroup* _segment_group; - std::shared_ptr _parent_tracker; // 当到达文件末尾或者到达end key时设置此标志 bool _eof; const Conditions* _conditions; diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h index 463752f63c..a6b533ef15 100644 --- a/be/src/olap/rowset/rowset.h +++ b/be/src/olap/rowset/rowset.h @@ -118,10 +118,6 @@ public: // returns OLAP_ERR_ROWSET_CREATE_READER when failed to create reader virtual OLAPStatus create_reader(std::shared_ptr* result) = 0; - // Support adding parent tracker, but should be careful about destruction sequence. - virtual OLAPStatus create_reader(const std::shared_ptr& parent_tracker, - std::shared_ptr* result) = 0; - // Split range denoted by `start_key` and `end_key` into sub-ranges, each contains roughly // `request_block_row_count` rows. Sub-range is represented by pair of OlapTuples and added to `ranges`. // diff --git a/be/src/olap/rowset/rowset_writer_context.h b/be/src/olap/rowset/rowset_writer_context.h index 8c314f5dba..74b65327cc 100644 --- a/be/src/olap/rowset/rowset_writer_context.h +++ b/be/src/olap/rowset/rowset_writer_context.h @@ -62,7 +62,6 @@ struct RowsetWriterContext { // indicate whether the data among segments is overlapping. // default is OVERLAP_UNKNOWN. SegmentsOverlapPB segments_overlap; - std::shared_ptr parent_mem_tracker; // segment file use uint32 to represent row number, therefore the maximum is UINT32_MAX. // the default is set to INT32_MAX to avoid overflow issue when casting from uint32_t to int. // test cases can change this value to control flush timing diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp index 06ec2521ce..897dd5e022 100644 --- a/be/src/olap/rowset/segment_reader.cpp +++ b/be/src/olap/rowset/segment_reader.cpp @@ -37,8 +37,7 @@ SegmentReader::SegmentReader(const std::string file, SegmentGroup* segment_group const std::set& load_bf_columns, const Conditions* conditions, const DeleteHandler* delete_handler, const DelCondSatisfied delete_status, Cache* lru_cache, - RuntimeState* runtime_state, OlapReaderStatistics* stats, - const std::shared_ptr& parent_tracker) + RuntimeState* runtime_state, OlapReaderStatistics* stats) : _file_name(file), _segment_group(segment_group), _segment_id(segment_id), @@ -58,8 +57,7 @@ SegmentReader::SegmentReader(const std::string file, SegmentGroup* segment_group _is_using_mmap(false), _is_data_loaded(false), _buffer_size(0), - _tracker(MemTracker::create_tracker(-1, "SegmentReader:" + file, parent_tracker)), - _mem_pool(new MemPool(_tracker.get())), + _mem_pool(new MemPool("SegmentReader:" + file)), _shared_buffer(nullptr), _lru_cache(lru_cache), _runtime_state(runtime_state), diff --git a/be/src/olap/rowset/segment_reader.h b/be/src/olap/rowset/segment_reader.h index 0d3aef0b75..91464ac2ca 100644 --- a/be/src/olap/rowset/segment_reader.h +++ b/be/src/olap/rowset/segment_reader.h @@ -51,8 +51,7 @@ public: const std::vector& used_columns, const std::set& load_bf_columns, const Conditions* conditions, const DeleteHandler* delete_handler, const DelCondSatisfied delete_status, - Cache* lru_cache, RuntimeState* runtime_state, OlapReaderStatistics* stats, - const std::shared_ptr& parent_tracker = nullptr); + Cache* lru_cache, RuntimeState* runtime_state, OlapReaderStatistics* stats); ~SegmentReader(); @@ -317,7 +316,6 @@ private: std::vector _cache_handle; const FileHeader* _file_header; - std::shared_ptr _tracker; std::unique_ptr _mem_pool; StorageByteBuffer* _shared_buffer; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 50716e640d..46aec0972c 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -457,9 +457,7 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool FileColumnIterator::FileColumnIterator(ColumnReader* reader) : _reader(reader) {} -FileColumnIterator::~FileColumnIterator() { - _opts.mem_tracker->release(_opts.mem_tracker->consumption()); -} +FileColumnIterator::~FileColumnIterator() {} Status FileColumnIterator::seek_to_first() { RETURN_IF_ERROR(_reader->seek_to_first(&_page_iter)); diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index dead2d9ee1..0e4a807d64 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -72,8 +72,6 @@ struct ColumnIteratorOptions { // INDEX_PAGE including index_page, dict_page and short_key_page PageTypePB type; - std::shared_ptr mem_tracker; - void sanity_check() const { CHECK_NOTNULL(rblock); CHECK_NOTNULL(stats); @@ -198,7 +196,6 @@ public: virtual ~ColumnIterator() = default; virtual Status init(const ColumnIteratorOptions& opts) { - DCHECK(opts.mem_tracker.get() != nullptr); _opts = opts; return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 1f328f2b9a..ebd5430ad8 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -59,7 +59,6 @@ struct ColumnWriterOptions { << ", need_bloom_filter" << need_bloom_filter; return ss.str(); } - std::shared_ptr parent = nullptr; }; class BitmapIndexWriter; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 2489770e6f..c5518e695e 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -51,10 +51,10 @@ Segment::Segment(const FilePathDesc& path_desc, uint32_t segment_id, const TabletSchema* tablet_schema) : _path_desc(path_desc), _segment_id(segment_id), _tablet_schema(tablet_schema) { #ifndef BE_TEST - _mem_tracker = MemTracker::create_tracker( + _mem_tracker = MemTracker::create_virtual_tracker( -1, "Segment", StorageEngine::instance()->tablet_mem_tracker()); #else - _mem_tracker = MemTracker::create_tracker(-1, "Segment"); + _mem_tracker = MemTracker::create_virtual_tracker(-1, "Segment"); #endif } @@ -70,7 +70,6 @@ Status Segment::_open() { } Status Segment::new_iterator(const Schema& schema, const StorageReadOptions& read_options, - std::shared_ptr parent, std::unique_ptr* iter) { if (!_is_open) { RETURN_IF_ERROR(_open()); @@ -94,7 +93,7 @@ Status Segment::new_iterator(const Schema& schema, const StorageReadOptions& rea } RETURN_IF_ERROR(_load_index()); - iter->reset(new SegmentIterator(this->shared_from_this(), schema, parent)); + iter->reset(new SegmentIterator(this->shared_from_this(), schema)); iter->get()->init(read_options); return Status::OK(); } @@ -203,8 +202,7 @@ Status Segment::_create_column_readers() { return Status::OK(); } -Status Segment::new_column_iterator(uint32_t cid, std::shared_ptr parent, - ColumnIterator** iter) { +Status Segment::new_column_iterator(uint32_t cid, ColumnIterator** iter) { if (_column_readers[cid] == nullptr) { const TabletColumn& tablet_column = _tablet_schema->column(cid); if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) { @@ -216,7 +214,6 @@ Status Segment::new_column_iterator(uint32_t cid, std::shared_ptr pa tablet_column.has_default_value(), tablet_column.default_value(), tablet_column.is_nullable(), type_info, tablet_column.length())); ColumnIteratorOptions iter_opts; - iter_opts.mem_tracker = MemTracker::create_tracker(-1, "DefaultColumnIterator", parent); RETURN_IF_ERROR(default_value_iter->init(iter_opts)); *iter = default_value_iter.release(); diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 56fc852d98..857d674470 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -64,15 +64,13 @@ public: ~Segment(); - Status new_iterator(const Schema& schema, const StorageReadOptions& read_options, - std::shared_ptr parent, - std::unique_ptr* iter); + Status new_iterator(const Schema& schema, const StorageReadOptions& read_options, std::unique_ptr* iter); uint64_t id() const { return _segment_id; } uint32_t num_rows() const { return _footer.num_rows(); } - Status new_column_iterator(uint32_t cid, std::shared_ptr parent, ColumnIterator** iter); + Status new_column_iterator(uint32_t cid, ColumnIterator** iter); Status new_bitmap_index_iterator(uint32_t cid, BitmapIndexIterator** iter); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 58360634c7..560f694e5a 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -92,18 +92,14 @@ private: bool _eof = false; }; -SegmentIterator::SegmentIterator(std::shared_ptr segment, const Schema& schema, - std::shared_ptr parent) +SegmentIterator::SegmentIterator(std::shared_ptr segment, const Schema& schema) : _segment(std::move(segment)), _schema(schema), _column_iterators(_schema.num_columns(), nullptr), _bitmap_index_iterators(_schema.num_columns(), nullptr), _cur_rowid(0), _lazy_materialization_read(false), - _inited(false) { - // use for count the mem use of ColumnIterator - _mem_tracker = MemTracker::create_tracker(-1, "SegmentIterator", std::move(parent)); -} + _inited(false) {} SegmentIterator::~SegmentIterator() { for (auto iter : _column_iterators) { @@ -199,18 +195,16 @@ Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_ra } } _seek_schema = std::make_unique(key_fields, key_fields.size()); - _seek_block = std::make_unique(*_seek_schema, 1, _mem_tracker); + _seek_block = std::make_unique(*_seek_schema, 1); // create used column iterator for (auto cid : _seek_schema->column_ids()) { if (_column_iterators[cid] == nullptr) { RETURN_IF_ERROR( - _segment->new_column_iterator(cid, _mem_tracker, &_column_iterators[cid])); + _segment->new_column_iterator(cid, &_column_iterators[cid])); ColumnIteratorOptions iter_opts; iter_opts.stats = _opts.stats; iter_opts.rblock = _rblock.get(); - iter_opts.mem_tracker = - MemTracker::create_tracker(-1, "ColumnIterator", _mem_tracker); RETURN_IF_ERROR(_column_iterators[cid]->init(iter_opts)); } } @@ -336,13 +330,11 @@ Status SegmentIterator::_init_return_column_iterators() { for (auto cid : _schema.column_ids()) { if (_column_iterators[cid] == nullptr) { RETURN_IF_ERROR( - _segment->new_column_iterator(cid, _mem_tracker, &_column_iterators[cid])); + _segment->new_column_iterator(cid, &_column_iterators[cid])); ColumnIteratorOptions iter_opts; iter_opts.stats = _opts.stats; iter_opts.use_page_cache = _opts.use_page_cache; iter_opts.rblock = _rblock.get(); - iter_opts.mem_tracker = - MemTracker::create_tracker(-1, "ColumnIterator", _mem_tracker); RETURN_IF_ERROR(_column_iterators[cid]->init(iter_opts)); } } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index ba82be3a90..15a7c415a9 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -47,8 +47,7 @@ class ColumnIterator; class SegmentIterator : public RowwiseIterator { public: - SegmentIterator(std::shared_ptr segment, const Schema& _schema, - std::shared_ptr parent); + SegmentIterator(std::shared_ptr segment, const Schema& _schema); ~SegmentIterator() override; Status init(const StorageReadOptions& opts) override; diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 93c5e885b0..70509ad234 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -37,9 +37,13 @@ const char* k_segment_magic = "D0R1"; const uint32_t k_segment_magic_length = 4; SegmentWriter::SegmentWriter(fs::WritableBlock* wblock, uint32_t segment_id, - const TabletSchema* tablet_schema, const SegmentWriterOptions& opts, std::shared_ptr parent) - : _segment_id(segment_id), _tablet_schema(tablet_schema), _opts(opts), _wblock(wblock), _mem_tracker(MemTracker::create_tracker( - -1, "Segment-" + std::to_string(segment_id), parent)) { + const TabletSchema* tablet_schema, const SegmentWriterOptions& opts) + : _segment_id(segment_id), + _tablet_schema(tablet_schema), + _opts(opts), + _wblock(wblock), + _mem_tracker( + MemTracker::create_virtual_tracker(-1, "SegmentWriter:Segment-" + std::to_string(segment_id))) { CHECK_NOTNULL(_wblock); } @@ -48,7 +52,7 @@ SegmentWriter::~SegmentWriter() { }; void SegmentWriter::init_column_meta(ColumnMetaPB* meta, uint32_t* column_id, - const TabletColumn& column) { + const TabletColumn& column) { // TODO(zc): Do we need this column_id?? meta->set_column_id((*column_id)++); meta->set_unique_id(column.unique_id()); @@ -85,7 +89,6 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec __attribute__((unused)) return Status::NotSupported("Do not support bitmap index for array type"); } } - opts.parent = _mem_tracker; std::unique_ptr writer; RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _wblock, &writer)); @@ -218,7 +221,7 @@ Status SegmentWriter::_write_footer() { // that will need an extra seek when reading fixed_buf.append(k_segment_magic, k_segment_magic_length); - std::vector slices{footer_buf, fixed_buf}; + std::vector slices {footer_buf, fixed_buf}; return _write_raw_data(slices); } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index d0600996ad..77a66c85db 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -53,7 +53,7 @@ struct SegmentWriterOptions { class SegmentWriter { public: explicit SegmentWriter(fs::WritableBlock* block, uint32_t segment_id, - const TabletSchema* tablet_schema, const SegmentWriterOptions& opts, std::shared_ptr parent = nullptr); + const TabletSchema* tablet_schema, const SegmentWriterOptions& opts); ~SegmentWriter(); Status init(uint32_t write_mbytes_per_sec); diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index c0b2085437..a1322e9f0a 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -37,6 +37,7 @@ #include "runtime/exec_env.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" +#include "runtime/thread_context.h" #include "util/defer_op.h" using std::deque; @@ -78,7 +79,7 @@ public: virtual ~RowBlockMerger(); bool merge(const std::vector& row_block_arr, RowsetWriter* rowset_writer, - std::shared_ptr parent, uint64_t* merged_rows); + uint64_t* merged_rows); private: struct MergeElement { @@ -741,10 +742,9 @@ bool RowBlockSorter::sort(RowBlock** row_block) { return true; } -RowBlockAllocator::RowBlockAllocator(const TabletSchema& tablet_schema, - std::shared_ptr parent, size_t memory_limitation) +RowBlockAllocator::RowBlockAllocator(const TabletSchema& tablet_schema, size_t memory_limitation) : _tablet_schema(tablet_schema), - _mem_tracker(MemTracker::create_tracker(-1, "RowBlockAllocator", parent)), + _mem_tracker(MemTracker::create_virtual_tracker(-1, "RowBlockAllocator")), _row_len(tablet_schema.row_size()), _memory_limitation(memory_limitation) { VLOG_NOTICE << "RowBlockAllocator(). row_len=" << _row_len; @@ -766,7 +766,7 @@ OLAPStatus RowBlockAllocator::allocate(RowBlock** row_block, size_t num_rows, bo << "m_memory_allocated=" << _mem_tracker->consumption() << " " << "mem limit for schema change=" << _memory_limitation << " " << "You can increase the memory " - << "by changing the Config.memory_limitation_per_thread_for_schema_change"; + << "by changing the Config.memory_limitation_per_thread_for_schema_change_bytes"; *row_block = nullptr; return OLAP_ERR_INPUT_PARAMETER_ERROR; } @@ -805,7 +805,7 @@ void RowBlockAllocator::release(RowBlock* row_block) { delete row_block; } -bool RowBlockAllocator::is_memory_enough_for_sorting(size_t num_rows, size_t allocated_rows){ +bool RowBlockAllocator::is_memory_enough_for_sorting(size_t num_rows, size_t allocated_rows) { if (num_rows <= allocated_rows) { return true; } @@ -813,18 +813,15 @@ bool RowBlockAllocator::is_memory_enough_for_sorting(size_t num_rows, size_t all return _mem_tracker->consumption() + row_block_size < _memory_limitation; } - RowBlockMerger::RowBlockMerger(TabletSharedPtr tablet) : _tablet(tablet) {} RowBlockMerger::~RowBlockMerger() {} bool RowBlockMerger::merge(const std::vector& row_block_arr, RowsetWriter* rowset_writer, - std::shared_ptr parent, uint64_t* merged_rows) { + uint64_t* merged_rows) { uint64_t tmp_merged_rows = 0; RowCursor row_cursor; - std::shared_ptr tracker( - MemTracker::create_tracker(-1, "RowBlockMerger", parent)); - std::unique_ptr mem_pool(new MemPool(tracker.get())); + std::unique_ptr mem_pool(new MemPool("RowBlockMerger")); std::unique_ptr agg_object_pool(new ObjectPool()); if (row_cursor.init(_tablet->tablet_schema()) != OLAP_SUCCESS) { LOG(WARNING) << "fail to init row cursor."; @@ -938,7 +935,7 @@ OLAPStatus LinkedSchemaChange::process(RowsetReaderSharedPtr rowset_reader, if (rowset_reader->type() != new_rowset_writer->type()) { LOG(INFO) << "the type of rowset " << rowset_reader->rowset()->rowset_id() << " in base tablet " << base_tablet->tablet_id() << " is not same as type " << new_rowset_writer->type() << ", use direct schema change."; - SchemaChangeDirectly scd(_row_block_changer, _mem_tracker); + SchemaChangeDirectly scd(_row_block_changer); return scd.process(rowset_reader, new_rowset_writer, new_tablet, base_tablet); } else { OLAPStatus status = new_rowset_writer->add_rowset_for_linked_schema_change( @@ -954,9 +951,8 @@ OLAPStatus LinkedSchemaChange::process(RowsetReaderSharedPtr rowset_reader, } } -SchemaChangeDirectly::SchemaChangeDirectly(const RowBlockChanger& row_block_changer, - std::shared_ptr mem_tracker) - : SchemaChange(mem_tracker), +SchemaChangeDirectly::SchemaChangeDirectly(const RowBlockChanger& row_block_changer) + : SchemaChange(), _row_block_changer(row_block_changer), _row_block_allocator(nullptr), _cursor(nullptr) {} @@ -1001,7 +997,7 @@ OLAPStatus SchemaChangeDirectly::process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* rowset_writer, TabletSharedPtr new_tablet, TabletSharedPtr base_tablet) { if (_row_block_allocator == nullptr) { - _row_block_allocator = new RowBlockAllocator(new_tablet->tablet_schema(), _mem_tracker, 0); + _row_block_allocator = new RowBlockAllocator(new_tablet->tablet_schema(), 0); if (_row_block_allocator == nullptr) { LOG(FATAL) << "failed to malloc RowBlockAllocator. size=" << sizeof(RowBlockAllocator); return OLAP_ERR_INPUT_PARAMETER_ERROR; @@ -1100,9 +1096,8 @@ OLAPStatus SchemaChangeDirectly::process(RowsetReaderSharedPtr rowset_reader, } SchemaChangeWithSorting::SchemaChangeWithSorting(const RowBlockChanger& row_block_changer, - std::shared_ptr mem_tracker, size_t memory_limitation) - : SchemaChange(mem_tracker), + : SchemaChange(), _row_block_changer(row_block_changer), _memory_limitation(memory_limitation), _row_block_allocator(nullptr) { @@ -1123,8 +1118,8 @@ OLAPStatus SchemaChangeWithSorting::process(RowsetReaderSharedPtr rowset_reader, TabletSharedPtr new_tablet, TabletSharedPtr base_tablet) { if (_row_block_allocator == nullptr) { - _row_block_allocator = new (nothrow) - RowBlockAllocator(new_tablet->tablet_schema(), _mem_tracker, _memory_limitation); + _row_block_allocator = + new (nothrow) RowBlockAllocator(new_tablet->tablet_schema(), _memory_limitation); if (_row_block_allocator == nullptr) { LOG(FATAL) << "failed to malloc RowBlockAllocator. size=" << sizeof(RowBlockAllocator); return OLAP_ERR_INPUT_PARAMETER_ERROR; @@ -1341,7 +1336,6 @@ bool SchemaChangeWithSorting::_internal_sorting(const std::vector& ro context.rowset_state = VISIBLE; context.version = version; context.segments_overlap = segments_overlap; - context.parent_mem_tracker = _mem_tracker; VLOG_NOTICE << "init rowset builder. tablet=" << new_tablet->full_name() << ", block_row_size=" << new_tablet->num_rows_per_row_block(); @@ -1351,7 +1345,7 @@ bool SchemaChangeWithSorting::_internal_sorting(const std::vector& ro return false; } - if (!merger.merge(row_block_arr, rowset_writer.get(), _mem_tracker, &merged_rows)) { + if (!merger.merge(row_block_arr, rowset_writer.get(), &merged_rows)) { LOG(WARNING) << "failed to merge row blocks."; new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + rowset_writer->rowset_id().to_string()); @@ -1370,7 +1364,7 @@ bool SchemaChangeWithSorting::_external_sorting(vector& src_row std::vector rs_readers; for (auto& rowset : src_rowsets) { RowsetReaderSharedPtr rs_reader; - auto res = rowset->create_reader(_mem_tracker, &rs_reader); + auto res = rowset->create_reader(&rs_reader); if (res != OLAP_SUCCESS) { LOG(WARNING) << "failed to create rowset reader."; return false; @@ -1505,6 +1499,7 @@ OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletRe reader_context.seek_columns = &return_columns; reader_context.sequence_id_idx = reader_context.tablet_schema->sequence_col_idx(); + // TODO(zxy) switch to tls mem tracker auto mem_tracker = MemTracker::create_tracker( -1, "AlterTablet:" + std::to_string(base_tablet->tablet_id()) + "-" + @@ -1572,7 +1567,7 @@ OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletRe } // acquire data sources correspond to history versions - base_tablet->capture_rs_readers(versions_to_be_changed, &rs_readers, mem_tracker); + base_tablet->capture_rs_readers(versions_to_be_changed, &rs_readers); if (rs_readers.size() < 1) { LOG(WARNING) << "fail to acquire all data sources. " << "version_num=" << versions_to_be_changed.size() @@ -1691,17 +1686,17 @@ OLAPStatus SchemaChangeHandler::schema_version_convert(TabletSharedPtr base_tabl // b. Generate historical data converter SchemaChange* sc_procedure = nullptr; if (sc_sorting) { - size_t memory_limitation = config::memory_limitation_per_thread_for_schema_change; LOG(INFO) << "doing schema change with sorting for base_tablet " << base_tablet->full_name(); sc_procedure = new (nothrow) SchemaChangeWithSorting( - rb_changer, _mem_tracker, memory_limitation * 1024 * 1024 * 1024); + rb_changer, + config::memory_limitation_per_thread_for_schema_change_bytes); } else if (sc_directly) { LOG(INFO) << "doing schema change directly for base_tablet " << base_tablet->full_name(); - sc_procedure = new (nothrow) SchemaChangeDirectly(rb_changer, _mem_tracker); + sc_procedure = new (nothrow) SchemaChangeDirectly(rb_changer); } else { LOG(INFO) << "doing linked schema change for base_tablet " << base_tablet->full_name(); - sc_procedure = new (nothrow) LinkedSchemaChange(rb_changer, _mem_tracker); + sc_procedure = new (nothrow) LinkedSchemaChange(rb_changer); } if (sc_procedure == nullptr) { @@ -1728,7 +1723,7 @@ OLAPStatus SchemaChangeHandler::schema_version_convert(TabletSharedPtr base_tabl reader_context.sequence_id_idx = reader_context.tablet_schema->sequence_col_idx(); RowsetReaderSharedPtr rowset_reader; - RETURN_NOT_OK((*base_rowset)->create_reader(_mem_tracker, &rowset_reader)); + RETURN_NOT_OK((*base_rowset)->create_reader(&rowset_reader)); RETURN_NOT_OK(rowset_reader->init(&reader_context)); RowsetWriterContext writer_context; @@ -1748,7 +1743,6 @@ OLAPStatus SchemaChangeHandler::schema_version_convert(TabletSharedPtr base_tabl writer_context.load_id.set_hi((*base_rowset)->load_id().hi()); writer_context.load_id.set_lo((*base_rowset)->load_id().lo()); writer_context.segments_overlap = (*base_rowset)->rowset_meta()->segments_overlap(); - writer_context.parent_mem_tracker = _mem_tracker; std::unique_ptr rowset_writer; RowsetFactory::create_rowset_writer(writer_context, &rowset_writer); @@ -1843,19 +1837,19 @@ OLAPStatus SchemaChangeHandler::_convert_historical_rowsets(const SchemaChangePa // b. Generate historical data converter if (sc_sorting) { - size_t memory_limitation = config::memory_limitation_per_thread_for_schema_change; LOG(INFO) << "doing schema change with sorting for base_tablet " << sc_params.base_tablet->full_name(); sc_procedure = new (nothrow) SchemaChangeWithSorting( - rb_changer, _mem_tracker, memory_limitation * 1024 * 1024 * 1024); + rb_changer, + config::memory_limitation_per_thread_for_schema_change_bytes); } else if (sc_directly) { LOG(INFO) << "doing schema change directly for base_tablet " << sc_params.base_tablet->full_name(); - sc_procedure = new (nothrow) SchemaChangeDirectly(rb_changer, _mem_tracker); + sc_procedure = new (nothrow) SchemaChangeDirectly(rb_changer); } else { LOG(INFO) << "doing linked schema change for base_tablet " << sc_params.base_tablet->full_name(); - sc_procedure = new (nothrow) LinkedSchemaChange(rb_changer, _mem_tracker); + sc_procedure = new (nothrow) LinkedSchemaChange(rb_changer); } if (sc_procedure == nullptr) { @@ -1893,7 +1887,6 @@ OLAPStatus SchemaChangeHandler::_convert_historical_rowsets(const SchemaChangePa writer_context.rowset_state = VISIBLE; writer_context.version = rs_reader->version(); writer_context.segments_overlap = rs_reader->rowset()->rowset_meta()->segments_overlap(); - writer_context.parent_mem_tracker = _mem_tracker; std::unique_ptr rowset_writer; OLAPStatus status = RowsetFactory::create_rowset_writer(writer_context, &rowset_writer); diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h index 2c95a3485c..2db9a8f189 100644 --- a/be/src/olap/schema_change.h +++ b/be/src/olap/schema_change.h @@ -76,8 +76,7 @@ private: class RowBlockAllocator { public: - RowBlockAllocator(const TabletSchema& tablet_schema, std::shared_ptr parent, - size_t memory_limitation); + RowBlockAllocator(const TabletSchema& tablet_schema, size_t memory_limitation); virtual ~RowBlockAllocator(); OLAPStatus allocate(RowBlock** row_block, size_t num_rows, bool null_supported); @@ -93,8 +92,7 @@ private: class SchemaChange { public: - SchemaChange(std::shared_ptr tracker) - : _mem_tracker(std::move(tracker)), _filtered_rows(0), _merged_rows(0) {} + SchemaChange() : _filtered_rows(0), _merged_rows(0) {} virtual ~SchemaChange() = default; virtual OLAPStatus process(RowsetReaderSharedPtr rowset_reader, @@ -113,9 +111,6 @@ public: void reset_merged_rows() { _merged_rows = 0; } -protected: - std::shared_ptr _mem_tracker; - private: uint64_t _filtered_rows; uint64_t _merged_rows; @@ -123,9 +118,8 @@ private: class LinkedSchemaChange : public SchemaChange { public: - explicit LinkedSchemaChange(const RowBlockChanger& row_block_changer, - std::shared_ptr mem_tracker) - : SchemaChange(mem_tracker), _row_block_changer(row_block_changer) {} + explicit LinkedSchemaChange(const RowBlockChanger& row_block_changer) + : SchemaChange(), _row_block_changer(row_block_changer) {} ~LinkedSchemaChange() {} virtual OLAPStatus process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_writer, @@ -141,8 +135,7 @@ class SchemaChangeDirectly : public SchemaChange { public: // @params tablet the instance of tablet which has new schema. // @params row_block_changer changer to modify the data of RowBlock - explicit SchemaChangeDirectly(const RowBlockChanger& row_block_changer, - std::shared_ptr mem_tracker); + explicit SchemaChangeDirectly(const RowBlockChanger& row_block_changer); virtual ~SchemaChangeDirectly(); virtual OLAPStatus process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_writer, @@ -162,7 +155,6 @@ private: class SchemaChangeWithSorting : public SchemaChange { public: explicit SchemaChangeWithSorting(const RowBlockChanger& row_block_changer, - std::shared_ptr mem_tracker, size_t memory_limitation); virtual ~SchemaChangeWithSorting(); diff --git a/be/src/olap/snapshot_manager.h b/be/src/olap/snapshot_manager.h index 0efa64711b..4846fbba2b 100644 --- a/be/src/olap/snapshot_manager.h +++ b/be/src/olap/snapshot_manager.h @@ -65,7 +65,10 @@ public: const int32_t& schema_hash); private: - SnapshotManager() : _snapshot_base_id(0) {} + SnapshotManager() : _snapshot_base_id(0) { + _mem_tracker = MemTracker::create_tracker(-1, "SnapshotManager", nullptr, + MemTrackerLevel::OVERVIEW); + } OLAPStatus _calc_snapshot_id_path(const TabletSharedPtr& tablet, int64_t timeout_s, std::string* out_path); @@ -99,6 +102,9 @@ private: // snapshot Mutex _snapshot_mutex; uint64_t _snapshot_base_id; + + // TODO(zxy) used after + std::shared_ptr _mem_tracker = nullptr; }; // SnapshotManager } // namespace doris diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 7db2d24d46..e831d022cf 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -141,8 +141,6 @@ StorageEngine::StorageEngine(const EngineOptions& options) }); REGISTER_HOOK_METRIC(compaction_mem_consumption, [this]() { return _compaction_mem_tracker->consumption(); - // We can get each compaction's detail usage - // LOG(INFO) << _compaction_mem_tracker=>LogUsage(2); }); } @@ -1083,12 +1081,12 @@ bool StorageEngine::check_rowset_id_in_unused_rowsets(const RowsetId& rowset_id) void StorageEngine::create_cumulative_compaction( TabletSharedPtr best_tablet, std::shared_ptr& cumulative_compaction) { - cumulative_compaction.reset(new CumulativeCompaction(best_tablet, _compaction_mem_tracker)); + cumulative_compaction.reset(new CumulativeCompaction(best_tablet)); } void StorageEngine::create_base_compaction(TabletSharedPtr best_tablet, std::shared_ptr& base_compaction) { - base_compaction.reset(new BaseCompaction(best_tablet, _compaction_mem_tracker)); + base_compaction.reset(new BaseCompaction(best_tablet)); } // Return json: diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 6516b81617..bff11d836b 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -640,17 +640,15 @@ OLAPStatus Tablet::_capture_consistent_rowsets_unlocked( } OLAPStatus Tablet::capture_rs_readers(const Version& spec_version, - std::vector* rs_readers, - std::shared_ptr parent_tracker) const { + std::vector* rs_readers) const { std::vector version_path; RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path)); - RETURN_NOT_OK(capture_rs_readers(version_path, rs_readers, parent_tracker)); + RETURN_NOT_OK(capture_rs_readers(version_path, rs_readers)); return OLAP_SUCCESS; } OLAPStatus Tablet::capture_rs_readers(const std::vector& version_path, - std::vector* rs_readers, - std::shared_ptr parent_tracker) const { + std::vector* rs_readers) const { DCHECK(rs_readers != nullptr && rs_readers->empty()); for (auto version : version_path) { auto it = _rs_version_map.find(version); @@ -667,7 +665,7 @@ OLAPStatus Tablet::capture_rs_readers(const std::vector& version_path, } } RowsetReaderSharedPtr rs_reader; - auto res = it->second->create_reader(parent_tracker, &rs_reader); + auto res = it->second->create_reader(&rs_reader); if (res != OLAP_SUCCESS) { LOG(WARNING) << "failed to create reader for rowset:" << it->second->rowset_id(); return OLAP_ERR_CAPTURE_ROWSET_READER_ERROR; diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 70a0c39441..b33753a09d 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -129,12 +129,10 @@ public: OLAPStatus capture_consistent_rowsets(const Version& spec_version, std::vector* rowsets) const; OLAPStatus capture_rs_readers(const Version& spec_version, - std::vector* rs_readers, - std::shared_ptr parent_tracker = nullptr) const; + std::vector* rs_readers) const; OLAPStatus capture_rs_readers(const std::vector& version_path, - std::vector* rs_readers, - std::shared_ptr parent_tracker = nullptr) const; + std::vector* rs_readers) const; DelPredicateArray delete_predicates() { return _tablet_meta->delete_predicates(); } void add_delete_predicate(const DeletePredicatePB& delete_predicate, int64_t version); diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index c6665a12b0..639d1d666b 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -46,6 +46,7 @@ #include "olap/tablet_meta.h" #include "olap/tablet_meta_manager.h" #include "olap/utils.h" +#include "runtime/thread_context.h" #include "service/backend_options.h" #include "util/doris_metrics.h" #include "util/file_utils.h" @@ -74,7 +75,7 @@ static bool _cmp_tablet_by_create_time(const TabletSharedPtr& a, const TabletSha TabletManager::TabletManager(int32_t tablet_map_lock_shard_size) : _mem_tracker(MemTracker::create_tracker(-1, "TabletManager", nullptr, - MemTrackerLevel::OVERVIEW)), + MemTrackerLevel::OVERVIEW)), _tablets_shards_size(tablet_map_lock_shard_size), _tablets_shards_mask(tablet_map_lock_shard_size - 1) { CHECK_GT(_tablets_shards_size, 0); @@ -85,7 +86,6 @@ TabletManager::TabletManager(int32_t tablet_map_lock_shard_size) } TabletManager::~TabletManager() { - _mem_tracker->release(_mem_tracker->consumption()); DEREGISTER_HOOK_METRIC(tablet_meta_mem_consumption); } @@ -201,7 +201,8 @@ OLAPStatus TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id, Schem // TODO: remove multiply 2 of tablet meta mem size // Because table schema will copy in tablet, there will be double mem cost // so here multiply 2 - _mem_tracker->consume(tablet->tablet_meta()->mem_size() * 2); + thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker()->transfer_to( + _mem_tracker.get(), tablet->tablet_meta()->mem_size() * 2); VLOG_NOTICE << "add tablet to map successfully." << " tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; @@ -1336,7 +1337,11 @@ OLAPStatus TabletManager::_drop_tablet_directly_unlocked(TTabletId tablet_id, } dropped_tablet->deregister_tablet_from_dir(); - _mem_tracker->release(dropped_tablet->tablet_meta()->mem_size() * 2); + // The dropped tablet meta is expected to be released in the TabletManager mem tracker, + // but is actually released in the tls mem tracker. + // So from TabletManager mem tracker compensate memory to tls tracker. + _mem_tracker->transfer_to(thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker().get(), + dropped_tablet->tablet_meta()->mem_size() * 2); return OLAP_SUCCESS; } diff --git a/be/src/olap/task/engine_alter_tablet_task.cpp b/be/src/olap/task/engine_alter_tablet_task.cpp index f04f9a0fe4..4af633c0c6 100644 --- a/be/src/olap/task/engine_alter_tablet_task.cpp +++ b/be/src/olap/task/engine_alter_tablet_task.cpp @@ -25,7 +25,14 @@ namespace doris { using std::to_string; EngineAlterTabletTask::EngineAlterTabletTask(const TAlterTabletReqV2& request) - : _alter_tablet_req(request) {} + : _alter_tablet_req(request) { + _mem_tracker = MemTracker::create_tracker( + config::memory_limitation_per_thread_for_schema_change_bytes, + fmt::format("EngineAlterTabletTask: {}-{}", + std::to_string(_alter_tablet_req.base_tablet_id), + std::to_string(_alter_tablet_req.new_tablet_id)), + StorageEngine::instance()->schema_change_mem_tracker(), MemTrackerLevel::TASK); +} OLAPStatus EngineAlterTabletTask::execute() { DorisMetrics::instance()->create_rollup_requests_total->increment(1); diff --git a/be/src/olap/task/engine_alter_tablet_task.h b/be/src/olap/task/engine_alter_tablet_task.h index cf57b45d80..2e022e9ccc 100644 --- a/be/src/olap/task/engine_alter_tablet_task.h +++ b/be/src/olap/task/engine_alter_tablet_task.h @@ -37,6 +37,7 @@ public: private: const TAlterTabletReqV2& _alter_tablet_req; + std::shared_ptr _mem_tracker; }; // EngineTask } // namespace doris diff --git a/be/src/olap/task/engine_batch_load_task.cpp b/be/src/olap/task/engine_batch_load_task.cpp index 9d29a33a9e..ad9e264244 100644 --- a/be/src/olap/task/engine_batch_load_task.cpp +++ b/be/src/olap/task/engine_batch_load_task.cpp @@ -52,6 +52,9 @@ EngineBatchLoadTask::EngineBatchLoadTask(TPushReq& push_req, std::vectorbatch_load_mem_tracker(), MemTrackerLevel::TASK); } EngineBatchLoadTask::~EngineBatchLoadTask() {} diff --git a/be/src/olap/task/engine_batch_load_task.h b/be/src/olap/task/engine_batch_load_task.h index 5d9daa9ba6..04805d670b 100644 --- a/be/src/olap/task/engine_batch_load_task.h +++ b/be/src/olap/task/engine_batch_load_task.h @@ -76,6 +76,7 @@ private: Status* _res_status; std::string _remote_file_path; std::string _local_file_path; + std::shared_ptr _mem_tracker; }; // class EngineBatchLoadTask } // namespace doris #endif // DORIS_BE_SRC_OLAP_TASK_ENGINE_BATCH_LOAD_TASK_H diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp index 931f30e2c3..83c1d88792 100644 --- a/be/src/olap/task/engine_checksum_task.cpp +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -27,7 +27,11 @@ EngineChecksumTask::EngineChecksumTask(TTabletId tablet_id, TSchemaHash schema_h : _tablet_id(tablet_id), _schema_hash(schema_hash), _version(version), - _checksum(checksum) {} + _checksum(checksum) { + _mem_tracker = MemTracker::create_tracker(-1, "compute checksum: " + std::to_string(tablet_id), + StorageEngine::instance()->consistency_mem_tracker(), + MemTrackerLevel::TASK); +} OLAPStatus EngineChecksumTask::execute() { OLAPStatus res = _compute_checksum(); diff --git a/be/src/olap/task/engine_checksum_task.h b/be/src/olap/task/engine_checksum_task.h index 7f0cdb6e6b..0430c560e9 100644 --- a/be/src/olap/task/engine_checksum_task.h +++ b/be/src/olap/task/engine_checksum_task.h @@ -44,6 +44,7 @@ private: TSchemaHash _schema_hash; TVersion _version; uint32_t* _checksum; + std::shared_ptr _mem_tracker; }; // EngineTask } // namespace doris diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index c1d42ce855..6231a96506 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -55,7 +55,11 @@ EngineCloneTask::EngineCloneTask(const TCloneReq& clone_req, const TMasterInfo& _tablet_infos(tablet_infos), _res_status(res_status), _signature(signature), - _master_info(master_info) {} + _master_info(master_info) { + _mem_tracker = MemTracker::create_tracker( + -1, "clone tablet: " + std::to_string(_clone_req.tablet_id), + StorageEngine::instance()->clone_mem_tracker(), MemTrackerLevel::TASK); +} OLAPStatus EngineCloneTask::execute() { // register the tablet to avoid it is deleted by gc thread during clone process diff --git a/be/src/olap/task/engine_clone_task.h b/be/src/olap/task/engine_clone_task.h index 3cb883ae31..3e8a34894b 100644 --- a/be/src/olap/task/engine_clone_task.h +++ b/be/src/olap/task/engine_clone_task.h @@ -77,6 +77,7 @@ private: const TMasterInfo& _master_info; int64_t _copy_size; int64_t _copy_time_ms; + std::shared_ptr _mem_tracker; }; // EngineTask } // namespace doris diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index c78111923e..414f4057c5 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -47,6 +47,7 @@ set(RUNTIME_FILES runtime_filter_mgr.cpp string_value.cpp thread_context.cpp + thread_mem_tracker_mgr.cpp thread_resource_mgr.cpp threadlocal.cc decimalv2_value.cpp diff --git a/be/src/runtime/buffered_block_mgr2.cc b/be/src/runtime/buffered_block_mgr2.cc index f6edfc6830..4e388f9d01 100644 --- a/be/src/runtime/buffered_block_mgr2.cc +++ b/be/src/runtime/buffered_block_mgr2.cc @@ -57,8 +57,7 @@ public: const std::shared_ptr& tracker, RuntimeState* state) : _mgr(mgr), _state(state), - _tracker(tracker), - _query_tracker(_mgr->_mem_tracker->parent()), + _tracker(MemTracker::create_virtual_tracker(-1, "BufferedBlockMgr2::Client", tracker)), _num_reserved_buffers(num_reserved_buffers), _num_tmp_reserved_buffers(0), _num_pinned_buffers(0) { @@ -83,10 +82,6 @@ public: // regardless of if they are in the block mgr or the clients). std::shared_ptr _tracker; - // This is the common ancestor between the block mgr tracker and the client tracker. - // When memory is transferred to the client, we want it to stop at this tracker. - std::shared_ptr _query_tracker; - // Number of buffers reserved by this client. int _num_reserved_buffers; @@ -100,7 +95,7 @@ public: DCHECK(buffer != nullptr); if (buffer->len == _mgr->max_block_size()) { ++_num_pinned_buffers; - _tracker->consume(buffer->len, _query_tracker.get()); + _tracker->consume(buffer->len); } } @@ -109,7 +104,7 @@ public: if (buffer->len == _mgr->max_block_size()) { DCHECK_GT(_num_pinned_buffers, 0); --_num_pinned_buffers; - _tracker->release(buffer->len, _query_tracker.get()); + _tracker->release(buffer->len); } } @@ -219,11 +214,9 @@ BufferedBlockMgr2::BufferedBlockMgr2(RuntimeState* state, TmpFileMgr* tmp_file_m _writes_issued(0), _state(state) {} -Status BufferedBlockMgr2::create(RuntimeState* state, const std::shared_ptr& parent, - RuntimeProfile* profile, TmpFileMgr* tmp_file_mgr, - int64_t mem_limit, int64_t block_size, +Status BufferedBlockMgr2::create(RuntimeState* state, RuntimeProfile* profile, + TmpFileMgr* tmp_file_mgr, int64_t mem_limit, int64_t block_size, std::shared_ptr* block_mgr) { - DCHECK(parent != nullptr); block_mgr->reset(); { // we do not use global BlockMgrsMap for now, to avoid mem-exceeded different fragments @@ -245,7 +238,7 @@ Status BufferedBlockMgr2::create(RuntimeState* state, const std::shared_ptrquery_id()] = *block_mgr; } } - (*block_mgr)->init(state->exec_env()->disk_io_mgr(), profile, parent, mem_limit); + (*block_mgr)->init(state->exec_env()->disk_io_mgr(), profile, mem_limit); return Status::OK(); } @@ -326,7 +319,7 @@ bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) { WARN_IF_ERROR(st, "consume failed"); if (size < max_block_size() && st) { // For small allocations (less than a block size), just let the allocation through. - client->_tracker->consume(size, client->_query_tracker.get()); + client->_tracker->consume(size); return true; } @@ -337,7 +330,7 @@ bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) { WARN_IF_ERROR(st, "consume failed"); if (st) { // There was still unallocated memory, don't need to recycle allocated blocks. - client->_tracker->consume(size, client->_query_tracker.get()); + client->_tracker->consume(size); return true; } @@ -396,14 +389,14 @@ bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) { if (!st) { return false; } - client->_tracker->consume(size, client->_query_tracker.get()); + client->_tracker->consume(size); DCHECK(validate()) << endl << debug_internal(); return true; } void BufferedBlockMgr2::release_memory(Client* client, int64_t size) { _mem_tracker->release(size); - client->_tracker->release(size, client->_query_tracker.get()); + client->_tracker->release(size); } void BufferedBlockMgr2::cancel() { @@ -1262,8 +1255,7 @@ string BufferedBlockMgr2::debug_internal() const { return ss.str(); } -void BufferedBlockMgr2::init(DiskIoMgr* io_mgr, RuntimeProfile* parent_profile, - const std::shared_ptr& parent_tracker, int64_t mem_limit) { +void BufferedBlockMgr2::init(DiskIoMgr* io_mgr, RuntimeProfile* parent_profile, int64_t mem_limit) { unique_lock l(_lock); if (_initialized) { return; @@ -1288,7 +1280,7 @@ void BufferedBlockMgr2::init(DiskIoMgr* io_mgr, RuntimeProfile* parent_profile, _integrity_check_timer = ADD_TIMER(_profile.get(), "TotalIntegrityCheckTime"); // Create a new mem_tracker and allocate buffers. - _mem_tracker = MemTracker::create_tracker(mem_limit, "BufferedBlockMgr2", parent_tracker); + _mem_tracker = MemTracker::create_virtual_tracker(mem_limit, "BufferedBlockMgr2"); _initialized = true; } diff --git a/be/src/runtime/buffered_block_mgr2.h b/be/src/runtime/buffered_block_mgr2.h index cf923dcb39..a56a8b4ef0 100644 --- a/be/src/runtime/buffered_block_mgr2.h +++ b/be/src/runtime/buffered_block_mgr2.h @@ -283,9 +283,9 @@ public: // same query id has already been created, that block mgr is returned. // - mem_limit: maximum memory that will be used by the block mgr. // - buffer_size: maximum size of each buffer. - static Status create(RuntimeState* state, const std::shared_ptr& parent, - RuntimeProfile* profile, TmpFileMgr* tmp_file_mgr, int64_t mem_limit, - int64_t buffer_size, std::shared_ptr* block_mgr); + static Status create(RuntimeState* state, RuntimeProfile* profile, TmpFileMgr* tmp_file_mgr, + int64_t mem_limit, int64_t buffer_size, + std::shared_ptr* block_mgr); ~BufferedBlockMgr2(); @@ -406,8 +406,7 @@ private: BufferedBlockMgr2(RuntimeState* state, TmpFileMgr* tmp_file_mgr, int64_t block_size); // Initializes the block mgr. Idempotent and thread-safe. - void init(DiskIoMgr* io_mgr, RuntimeProfile* profile, - const std::shared_ptr& parent_tracker, int64_t mem_limit); + void init(DiskIoMgr* io_mgr, RuntimeProfile* profile, int64_t mem_limit); // Initializes _tmp_files. This is initialized the first time we need to write to disk. // Must be called with _lock taken. diff --git a/be/src/runtime/buffered_tuple_stream2.cc b/be/src/runtime/buffered_tuple_stream2.cc index d0c9428ffe..6f55086e8b 100644 --- a/be/src/runtime/buffered_tuple_stream2.cc +++ b/be/src/runtime/buffered_tuple_stream2.cc @@ -484,7 +484,7 @@ Status BufferedTupleStream2::get_rows(unique_ptr* batch, bool* got_row return Status::OK(); } RETURN_IF_ERROR(prepare_for_read(false)); - batch->reset(new RowBatch(_desc, num_rows(), _block_mgr->get_tracker(_block_mgr_client).get())); + batch->reset(new RowBatch(_desc, num_rows())); bool eos = false; // Loop until get_next fills the entire batch. Each call can stop at block // boundaries. We generally want it to stop, so that blocks can be freed diff --git a/be/src/runtime/buffered_tuple_stream3.cc b/be/src/runtime/buffered_tuple_stream3.cc index 6707140c2c..72eead6b08 100644 --- a/be/src/runtime/buffered_tuple_stream3.cc +++ b/be/src/runtime/buffered_tuple_stream3.cc @@ -20,7 +20,6 @@ #include "runtime/buffered_tuple_stream3.inline.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" -#include "runtime/mem_tracker.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" #include "runtime/string_value.h" @@ -664,8 +663,7 @@ void BufferedTupleStream3::UnpinStream(UnpinMode mode) { CHECK_CONSISTENCY_FULL(); } */ -Status BufferedTupleStream3::GetRows(const std::shared_ptr& tracker, - std::unique_ptr* batch, bool* got_rows) { +Status BufferedTupleStream3::GetRows(std::unique_ptr* batch, bool* got_rows) { if (num_rows() > numeric_limits::max()) { // RowBatch::num_rows_ is a 32-bit int, avoid an overflow. return Status::InternalError( @@ -682,7 +680,7 @@ Status BufferedTupleStream3::GetRows(const std::shared_ptr& tracker, // TODO chenhao // capacity in RowBatch use int, but _num_rows is int64_t // it may be precision loss - batch->reset(new RowBatch(*desc_, num_rows(), tracker.get())); + batch->reset(new RowBatch(*desc_, num_rows())); bool eos = false; // Loop until GetNext fills the entire batch. Each call can stop at page // boundaries. We generally want it to stop, so that pages can be freed diff --git a/be/src/runtime/buffered_tuple_stream3.h b/be/src/runtime/buffered_tuple_stream3.h index 927382cff4..312798326e 100644 --- a/be/src/runtime/buffered_tuple_stream3.h +++ b/be/src/runtime/buffered_tuple_stream3.h @@ -30,7 +30,6 @@ namespace doris { -class MemTracker; class RuntimeState; class RowDescriptor; class SlotDescriptor; @@ -333,8 +332,7 @@ public: /// process. If the current unused reservation is not sufficient to pin the stream in /// memory, this will try to increase the reservation. If that fails, 'got_rows' is set /// to false. - Status GetRows(const std::shared_ptr& tracker, std::unique_ptr* batch, - bool* got_rows) WARN_UNUSED_RESULT; + Status GetRows(std::unique_ptr* batch, bool* got_rows) WARN_UNUSED_RESULT; /// Must be called once at the end to cleanup all resources. If 'batch' is non-nullptr, /// attaches buffers from pinned pages that rows returned from GetNext() may reference. diff --git a/be/src/runtime/bufferpool/reservation_tracker.cc b/be/src/runtime/bufferpool/reservation_tracker.cc index 1e56441a0d..b3bd9baf32 100644 --- a/be/src/runtime/bufferpool/reservation_tracker.cc +++ b/be/src/runtime/bufferpool/reservation_tracker.cc @@ -193,7 +193,7 @@ bool ReservationTracker::TryConsumeFromMemTracker(int64_t reservation_increase) // For lower links, there shouldn't be a limit to enforce, so we just need to // update the consumption of the linked MemTracker since the reservation is // already reflected in its parent. - mem_tracker_->consume(reservation_increase, GetParentMemTracker()); + mem_tracker_->consume_local(reservation_increase, GetParentMemTracker()); return true; } } @@ -204,7 +204,7 @@ void ReservationTracker::ReleaseToMemTracker(int64_t reservation_decrease) { if (GetParentMemTracker() == nullptr) { mem_tracker_->release(reservation_decrease); } else { - mem_tracker_->release(reservation_decrease, GetParentMemTracker()); + mem_tracker_->release_local(reservation_decrease, GetParentMemTracker()); } } diff --git a/be/src/runtime/data_stream_mgr.cpp b/be/src/runtime/data_stream_mgr.cpp index fbe2af1ca7..6363c61d4b 100644 --- a/be/src/runtime/data_stream_mgr.cpp +++ b/be/src/runtime/data_stream_mgr.cpp @@ -71,7 +71,7 @@ shared_ptr DataStreamMgr::create_recvr( VLOG_FILE << "creating receiver for fragment=" << fragment_instance_id << ", node=" << dest_node_id; shared_ptr recvr(new DataStreamRecvr( - this, state->instance_mem_tracker(), row_desc, fragment_instance_id, dest_node_id, + this, row_desc, fragment_instance_id, dest_node_id, num_senders, is_merging, buffer_size, profile, sub_plan_query_statistics_recvr)); uint32_t hash_value = get_hash_value(fragment_instance_id, dest_node_id); lock_guard l(_lock); diff --git a/be/src/runtime/data_stream_recvr.cc b/be/src/runtime/data_stream_recvr.cc index 2536664271..7fe66524c3 100644 --- a/be/src/runtime/data_stream_recvr.cc +++ b/be/src/runtime/data_stream_recvr.cc @@ -251,7 +251,7 @@ void DataStreamRecvr::SenderQueue::add_batch(const PRowBatch& pb_batch, int be_n // Note: if this function makes a row batch, the batch *must* be added // to _batch_queue. It is not valid to create the row batch and destroy // it in this thread. - batch = new RowBatch(_recvr->row_desc(), pb_batch, _recvr->mem_tracker().get()); + batch = new RowBatch(_recvr->row_desc(), pb_batch); } VLOG_ROW << "added #rows=" << batch->num_rows() << " batch_size=" << batch_size << "\n"; @@ -273,8 +273,7 @@ void DataStreamRecvr::SenderQueue::add_batch(RowBatch* batch, bool use_move) { if (_is_cancelled) { return; } - RowBatch* nbatch = - new RowBatch(_recvr->row_desc(), batch->capacity(), _recvr->mem_tracker().get()); + RowBatch* nbatch = new RowBatch(_recvr->row_desc(), batch->capacity()); if (use_move) { nbatch->acquire_state(batch); } else { @@ -376,7 +375,7 @@ Status DataStreamRecvr::create_merger(const TupleRowComparator& less_than) { } Status DataStreamRecvr::create_parallel_merger(const TupleRowComparator& less_than, - uint32_t batch_size, MemTracker* mem_tracker) { + uint32_t batch_size) { DCHECK(_is_merging); vector child_input_batch_suppliers; @@ -401,8 +400,8 @@ Status DataStreamRecvr::create_parallel_merger(const TupleRowComparator& less_th auto step = _sender_queues.size() / parallel_thread + 1; for (int i = 0; i < _sender_queues.size(); i += step) { // Create the merger that will a single stream of sorted rows. - std::unique_ptr child_merger(new ChildSortedRunMerger( - less_than, &_row_desc, _profile, mem_tracker, batch_size, false)); + std::unique_ptr child_merger( + new ChildSortedRunMerger(less_than, &_row_desc, _profile, batch_size, false)); vector input_batch_suppliers; for (int j = i; j < std::min((size_t)i + step, _sender_queues.size()); ++j) { input_batch_suppliers.emplace_back(bind(mem_fn(&SenderQueue::get_batch), @@ -435,10 +434,9 @@ void DataStreamRecvr::transfer_all_resources(RowBatch* transfer_batch) { } DataStreamRecvr::DataStreamRecvr( - DataStreamMgr* stream_mgr, const std::shared_ptr& parent_tracker, - const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id, - PlanNodeId dest_node_id, int num_senders, bool is_merging, int total_buffer_limit, - RuntimeProfile* profile, + DataStreamMgr* stream_mgr, const RowDescriptor& row_desc, + const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders, + bool is_merging, int total_buffer_limit, RuntimeProfile* profile, std::shared_ptr sub_plan_query_statistics_recvr) : _mgr(stream_mgr), _fragment_instance_id(fragment_instance_id), @@ -449,7 +447,7 @@ DataStreamRecvr::DataStreamRecvr( _num_buffered_bytes(0), _profile(profile), _sub_plan_query_statistics_recvr(sub_plan_query_statistics_recvr) { - _mem_tracker = MemTracker::create_tracker(-1, "DataStreamRecvr", parent_tracker, + _mem_tracker = MemTracker::create_tracker(-1, "DataStreamRecvr", nullptr, MemTrackerLevel::VERBOSE, _profile); // Create one queue per sender if is_merging is true. diff --git a/be/src/runtime/data_stream_recvr.h b/be/src/runtime/data_stream_recvr.h index 3e4806c6c2..9bc084d646 100644 --- a/be/src/runtime/data_stream_recvr.h +++ b/be/src/runtime/data_stream_recvr.h @@ -88,8 +88,7 @@ public: // queues. The exprs used in less_than must have already been prepared and opened. Status create_merger(const TupleRowComparator& less_than); - Status create_parallel_merger(const TupleRowComparator& less_than, uint32_t batch_size, - MemTracker* mem_tracker); + Status create_parallel_merger(const TupleRowComparator& less_than, uint32_t batch_size); // Fill output_batch with the next batch of rows obtained by merging the per-sender // input streams. Must only be called if _is_merging is true. Status get_next(RowBatch* output_batch, bool* eos); @@ -101,7 +100,6 @@ public: const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; } PlanNodeId dest_node_id() const { return _dest_node_id; } const RowDescriptor& row_desc() const { return _row_desc; } - std::shared_ptr mem_tracker() const { return _mem_tracker; } void add_sub_plan_statistics(const PQueryStatistics& statistics, int sender_id) { _sub_plan_query_statistics_recvr->insert(statistics, sender_id); @@ -115,10 +113,9 @@ private: friend class DataStreamMgr; class SenderQueue; - DataStreamRecvr(DataStreamMgr* stream_mgr, const std::shared_ptr& parent_tracker, - const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id, - PlanNodeId dest_node_id, int num_senders, bool is_merging, - int total_buffer_limit, RuntimeProfile* profile, + DataStreamRecvr(DataStreamMgr* stream_mgr, const RowDescriptor& row_desc, + const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders, + bool is_merging, int total_buffer_limit, RuntimeProfile* profile, std::shared_ptr sub_plan_query_statistics_recvr); // If receive queue is full, done is enqueue pending, and return with *done is nullptr diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp index 0fdb68f6d5..e3bd87b14c 100644 --- a/be/src/runtime/data_stream_sender.cpp +++ b/be/src/runtime/data_stream_sender.cpp @@ -89,7 +89,7 @@ Status DataStreamSender::Channel::init(RuntimeState* state) { // TODO: figure out how to size _batch int capacity = std::max(1, _buffer_size / std::max(_row_desc.get_row_size(), 1)); - _batch.reset(new RowBatch(_row_desc, capacity, _parent->_mem_tracker.get())); + _batch.reset(new RowBatch(_row_desc, capacity)); if (_brpc_dest_addr.hostname.empty()) { LOG(WARNING) << "there is no brpc destination address's hostname" @@ -388,6 +388,7 @@ Status DataStreamSender::prepare(RuntimeState* state) { << "])"; _profile = _pool->add(new RuntimeProfile(title.str())); SCOPED_TIMER(_profile->total_time_counter()); + // TODO(zxy) used after _mem_tracker = MemTracker::create_tracker( -1, "DataStreamSender:" + print_id(state->fragment_instance_id()), state->instance_mem_tracker(), MemTrackerLevel::VERBOSE, _profile); diff --git a/be/src/runtime/disk_io_mgr.cc b/be/src/runtime/disk_io_mgr.cc index 032296f73a..3c2315b308 100644 --- a/be/src/runtime/disk_io_mgr.cc +++ b/be/src/runtime/disk_io_mgr.cc @@ -21,6 +21,7 @@ #include "runtime/disk_io_mgr_internal.h" #include "runtime/exec_env.h" +#include "runtime/thread_context.h" using std::string; using std::stringstream; @@ -215,6 +216,8 @@ void DiskIoMgr::BufferDescriptor::reset(RequestContext* reader, ScanRange* range _len = 0; _eosr = false; _status = Status::OK(); + // Consume in the tls mem tracker when the buffer is allocated. + _buffer_mem_tracker = thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker().get(); } void DiskIoMgr::BufferDescriptor::return_buffer() { @@ -222,21 +225,24 @@ void DiskIoMgr::BufferDescriptor::return_buffer() { _io_mgr->return_buffer(this); } -void DiskIoMgr::BufferDescriptor::set_mem_tracker(std::shared_ptr tracker) { +void DiskIoMgr::BufferDescriptor::update_mem_tracker(MemTracker* tracker) { // Cached buffers don't count towards mem usage. if (_scan_range->_cached_buffer != nullptr) { return; } - if (_mem_tracker.get() == tracker.get()) { + if (_buffer_mem_tracker == tracker) { return; } - if (_mem_tracker != nullptr) { - _mem_tracker->release(_buffer_len); - } - _mem_tracker = std::move(tracker); - if (tracker != nullptr) { - _mem_tracker->consume(_buffer_len); - } + // Only when the current tracker of desc and the parameter tracker are not null, + // the memory ownership will be transferred. + DCHECK(_buffer_mem_tracker && tracker); + _buffer_mem_tracker->transfer_to(tracker, _buffer_len); + _buffer_mem_tracker = std::move(tracker); +} + +void DiskIoMgr::BufferDescriptor::set_mem_tracker(MemTracker* tracker) { + DCHECK(!_buffer_mem_tracker); + _buffer_mem_tracker = std::move(tracker); } DiskIoMgr::WriteRange::WriteRange(const string& file, int64_t file_offset, int disk_id, @@ -391,7 +397,7 @@ Status DiskIoMgr::init(const int64_t mem_limit) { // _disk_thread_group.AddThread(new Thread("disk-io-mgr", ss.str(), // &DiskIoMgr::work_loop, this, _disk_queues[i])); _disk_thread_group.add_thread( - new std::thread(std::bind(&DiskIoMgr::work_loop, this, _disk_queues[i]))); + new std::thread(std::bind(&DiskIoMgr::work_loop, this, _disk_queues[i], _mem_tracker))); } } _request_context_cache.reset(new RequestContextCache(this)); @@ -700,7 +706,9 @@ DiskIoMgr::BufferDescriptor* DiskIoMgr::get_buffer_desc(RequestContext* reader, } } buffer_desc->reset(reader, range, buffer, buffer_size); - buffer_desc->set_mem_tracker(reader->_mem_tracker); + // The buffer is consumed in the tls mem tracker, and we want to be recorded in the reader->_mem_tracker, + // so if the two trackers are different, transfer memory ownership. + buffer_desc->update_mem_tracker(reader->_mem_tracker.get()); return buffer_desc; } @@ -717,11 +725,10 @@ char* DiskIoMgr::get_free_buffer(int64_t* buffer_size) { char* buffer = nullptr; if (_free_buffers[idx].empty()) { ++_num_allocated_buffers; - // Update the disk io mem usage. This is checked the next time we start - // a read for the next reader (DiskIoMgr::GetNextScanRange) - _mem_tracker->consume(*buffer_size); buffer = new char[*buffer_size]; } else { + // This means the buffer's memory ownership is transferred from DiskIoMgr to tls tracker. + _mem_tracker->transfer_to(thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker().get(), *buffer_size); buffer = _free_buffers[idx].front(); _free_buffers[idx].pop_front(); } @@ -736,7 +743,6 @@ void DiskIoMgr::gc_io_buffers(int64_t bytes_to_free) { for (list::iterator iter = _free_buffers[idx].begin(); iter != _free_buffers[idx].end(); ++iter) { int64_t buffer_size = (1 << idx) * _min_buffer_size; - _mem_tracker->release(buffer_size); --_num_allocated_buffers; delete[] * iter; @@ -747,15 +753,19 @@ void DiskIoMgr::gc_io_buffers(int64_t bytes_to_free) { break; } } + // The deleted buffer is released in the tls mem tracker, the deleted buffer belongs to DiskIoMgr, + // so the freed memory should be recorded in the DiskIoMgr mem tracker. So if the tls mem tracker + // and the DiskIoMgr tracker are different, transfer memory ownership. + _mem_tracker->transfer_to(thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker().get(), bytes_freed); } void DiskIoMgr::return_free_buffer(BufferDescriptor* desc) { - return_free_buffer(desc->_buffer, desc->_buffer_len); + return_free_buffer(desc->_buffer, desc->_buffer_len, desc->buffer_mem_tracker()); + // The buffer in the delete above has been released in the desc mem tracker, reset it to nullptr here. desc->set_mem_tracker(nullptr); - desc->_buffer = nullptr; } -void DiskIoMgr::return_free_buffer(char* buffer, int64_t buffer_size) { +void DiskIoMgr::return_free_buffer(char* buffer, int64_t buffer_size, MemTracker* tracker) { DCHECK(buffer != nullptr); int idx = free_buffers_idx(buffer_size); DCHECK_EQ(bit_ceil(buffer_size, _min_buffer_size) & ~(1 << idx), 0) @@ -763,11 +773,16 @@ void DiskIoMgr::return_free_buffer(char* buffer, int64_t buffer_size) { << buffer_size << ", _min_buffer_size = " << _min_buffer_size; unique_lock lock(_free_buffers_lock); if (!config::disable_mem_pools && _free_buffers[idx].size() < config::max_free_io_buffers) { + // The buffer's memory ownership is transferred from desc->buffer_mem_tracker to DiskIoMgr tracker. + tracker->transfer_to(_mem_tracker.get(), buffer_size); _free_buffers[idx].push_back(buffer); } else { - _mem_tracker->release(buffer_size); --_num_allocated_buffers; delete[] buffer; + // The deleted buffer is released in the tls mem tracker. When the buffer was allocated, + // it was consumed in BufferDescriptor->buffer_mem_tracker, so if the tls mem tracker and + // the tracker in the parameters are different, transfer memory ownership. + tracker->transfer_to(thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker().get(), buffer_size); } } @@ -976,7 +991,7 @@ void DiskIoMgr::handle_read_finished(DiskQueue* disk_queue, RequestContext* read state.decrement_request_thread(); } -void DiskIoMgr::work_loop(DiskQueue* disk_queue) { +void DiskIoMgr::work_loop(DiskQueue* disk_queue, const std::shared_ptr& mem_tracker) { // The thread waits until there is work or the entire system is being shut down. // If there is work, performs the read or write requested and re-enqueues the // requesting context. @@ -988,6 +1003,7 @@ void DiskIoMgr::work_loop(DiskQueue* disk_queue) { // re-enqueues the request. // 3. Perform the read or write as specified. // Cancellation checking needs to happen in both steps 1 and 3. + SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::LOAD, mem_tracker); while (!_shut_down) { RequestContext* worker_context = nullptr; ; diff --git a/be/src/runtime/disk_io_mgr.h b/be/src/runtime/disk_io_mgr.h index 0f7346b8a4..8d18f6f686 100644 --- a/be/src/runtime/disk_io_mgr.h +++ b/be/src/runtime/disk_io_mgr.h @@ -237,13 +237,18 @@ public: int64_t buffer_len() { return _buffer_len; } int64_t len() { return _len; } bool eosr() { return _eosr; } + MemTracker* buffer_mem_tracker() { return _buffer_mem_tracker; } // Returns the offset within the scan range that this buffer starts at int64_t scan_range_offset() const { return _scan_range_offset; } - // Updates this buffer to be owned by the new tracker. Consumption is - // release from the current tracker and added to the new one. - void set_mem_tracker(std::shared_ptr tracker); + // Updates this buffer to be owned by the new tracker. + // Transfer memory ownership between two trackers. + void update_mem_tracker(MemTracker* tracker); + + // To set a tracker, make sure that in an external location, + // the desc buffer's memory must have transferred ownership, + void set_mem_tracker(MemTracker* tracker); // Returns the buffer to the IoMgr. This must be called for every buffer // returned by get_next()/read() that did not return an error. This is non-blocking. @@ -263,7 +268,7 @@ public: RequestContext* _reader; // The current tracker this buffer is associated with. - std::shared_ptr _mem_tracker; + MemTracker* _buffer_mem_tracker; // Scan range that this buffer is for. ScanRange* _scan_range; @@ -440,6 +445,7 @@ public: // If non-null, this is DN cached buffer. This means the cached read succeeded // and all the bytes for the range are in this buffer. + // TODO(zxy) Not used, maybe delete struct hadoopRzBuffer* _cached_buffer; // Lock protecting fields below. @@ -793,7 +799,7 @@ private: // Returns a buffer to the free list. buffer_size / _min_buffer_size should be a power // of 2, and buffer_size should be <= _max_buffer_size. These constraints will be met // if buffer was acquired via get_free_buffer() (which it should have been). - void return_free_buffer(char* buffer, int64_t buffer_size); + void return_free_buffer(char* buffer, int64_t buffer_size, MemTracker* tracker); // Returns the buffer in desc (cannot be nullptr), sets buffer to nullptr and clears the // mem tracker. @@ -802,7 +808,7 @@ private: // Disk worker thread loop. This function retrieves the next range to process on // the disk queue and invokes read_range() or Write() depending on the type of Range(). // There can be multiple threads per disk running this loop. - void work_loop(DiskQueue* queue); + void work_loop(DiskQueue* queue, const std::shared_ptr& mem_tracker); // This is called from the disk thread to get the next range to process. It will // wait until a scan range and buffer are available, or a write range is available. diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index cacd38be17..586f5c1cd2 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -43,6 +43,7 @@ #include "runtime/stream_load/load_stream_mgr.h" #include "runtime/stream_load/stream_load_context.h" #include "runtime/stream_load/stream_load_pipe.h" +#include "runtime/thread_context.h" #include "service/backend_options.h" #include "util/debug_util.h" #include "util/doris_metrics.h" @@ -239,6 +240,10 @@ Status FragmentExecState::execute() { Status FragmentExecState::cancel_before_execute() { // set status as 'abort', cuz cancel() won't effect the status arg of DataSink::close(). +#ifndef BE_TEST + SCOPED_ATTACH_TASK_THREAD(executor()->runtime_state()->query_type(), + executor()->runtime_state()->instance_mem_tracker()); +#endif _executor.set_abort(); _executor.cancel(); if (_pipe != nullptr) { @@ -461,6 +466,11 @@ void FragmentMgr::_exec_actual(std::shared_ptr exec_state, Fi .query_id(exec_state->query_id()) .instance_id(exec_state->fragment_instance_id()) .tag("pthread_id", std::to_string((uintptr_t)pthread_self())); +#ifndef BE_TEST + SCOPED_ATTACH_TASK_THREAD(exec_state->executor()->runtime_state()->query_type(), + print_id(exec_state->query_id()), exec_state->fragment_instance_id(), + exec_state->executor()->runtime_state()->instance_mem_tracker()); +#endif exec_state->execute(); std::shared_ptr fragments_ctx = exec_state->get_fragments_ctx(); diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp index a8ba886fa7..95a50b5ded 100644 --- a/be/src/runtime/load_channel.cpp +++ b/be/src/runtime/load_channel.cpp @@ -24,12 +24,11 @@ namespace doris { LoadChannel::LoadChannel(const UniqueId& load_id, int64_t mem_limit, int64_t timeout_s, - const std::shared_ptr& mem_tracker, bool is_high_priority, - const std::string& sender_ip) + bool is_high_priority, const std::string& sender_ip) : _load_id(load_id), _timeout_s(timeout_s), _is_high_priority(is_high_priority), _sender_ip(sender_ip) { _mem_tracker = MemTracker::create_tracker( - mem_limit, "LoadChannel:" + _load_id.to_string(), mem_tracker, MemTrackerLevel::TASK); + mem_limit, "LoadChannel:" + _load_id.to_string(), nullptr, MemTrackerLevel::TASK); // _last_updated_time should be set before being inserted to // _load_channels in load_channel_mgr, or it may be erased // immediately by gc thread. @@ -53,7 +52,7 @@ Status LoadChannel::open(const PTabletWriterOpenRequest& params) { } else { // create a new tablets channel TabletsChannelKey key(params.id(), index_id); - channel.reset(new TabletsChannel(key, _mem_tracker, _is_high_priority)); + channel.reset(new TabletsChannel(key, _is_high_priority)); _tablets_channels.insert({index_id, channel}); } } diff --git a/be/src/runtime/load_channel.h b/be/src/runtime/load_channel.h index 13490f5fa8..ba0ad30334 100644 --- a/be/src/runtime/load_channel.h +++ b/be/src/runtime/load_channel.h @@ -39,8 +39,7 @@ class TabletsChannel; class LoadChannel { public: LoadChannel(const UniqueId& load_id, int64_t mem_limit, int64_t timeout_s, - const std::shared_ptr& mem_tracker, bool is_high_priority, - const std::string& sender_ip); + bool is_high_priority, const std::string& sender_ip); ~LoadChannel(); // open a new load channel if not exist diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp index 008dfeb40f..e5e23f5110 100644 --- a/be/src/runtime/load_channel_mgr.cpp +++ b/be/src/runtime/load_channel_mgr.cpp @@ -112,7 +112,7 @@ Status LoadChannelMgr::open(const PTabletWriterOpenRequest& params) { int64_t job_timeout_s = calc_job_timeout_s(timeout_in_req_s); bool is_high_priority = (params.has_is_high_priority() && params.is_high_priority()); - channel.reset(new LoadChannel(load_id, job_max_memory, job_timeout_s, _mem_tracker, is_high_priority, + channel.reset(new LoadChannel(load_id, job_max_memory, job_timeout_s, is_high_priority, params.sender_ip())); _load_channels.insert({load_id, channel}); } diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp index 469a641e64..fed5724c0c 100644 --- a/be/src/runtime/mem_pool.cpp +++ b/be/src/runtime/mem_pool.cpp @@ -24,6 +24,7 @@ #include "runtime/mem_tracker.h" #include "runtime/memory/chunk_allocator.h" +#include "runtime/thread_context.h" #include "util/bit_util.h" #include "util/doris_metrics.h" @@ -55,6 +56,14 @@ MemPool::MemPool(const std::string& label) _mem_tracker = _mem_tracker_own.get(); } +MemPool::MemPool() + : current_chunk_idx_(-1), + next_chunk_size_(INITIAL_CHUNK_SIZE), + total_allocated_bytes_(0), + total_reserved_bytes_(0), + peak_allocated_bytes_(0), + _mem_tracker(thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker().get()) {} + MemPool::ChunkInfo::ChunkInfo(const Chunk& chunk_) : chunk(chunk_), allocated_bytes(0) { DorisMetrics::instance()->memory_pool_bytes_total->increment(chunk.size); } diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp index f10f3a7f5c..b4ee28d9fa 100644 --- a/be/src/runtime/mem_tracker.cpp +++ b/be/src/runtime/mem_tracker.cpp @@ -25,6 +25,7 @@ #include "gutil/once.h" #include "runtime/exec_env.h" #include "runtime/runtime_state.h" +#include "runtime/thread_context.h" #include "service/backend_options.h" #include "util/pretty_printer.h" #include "util/string_util.h" @@ -85,7 +86,7 @@ std::shared_ptr MemTracker::create_tracker(int64_t byte_limit, const const std::shared_ptr& parent, MemTrackerLevel level, RuntimeProfile* profile) { - std::shared_ptr reset_parent = parent ? parent : MemTracker::get_process_tracker(); + std::shared_ptr reset_parent = parent ? parent : thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker(); DCHECK(reset_parent); std::shared_ptr tracker( @@ -99,7 +100,7 @@ std::shared_ptr MemTracker::create_tracker(int64_t byte_limit, const std::shared_ptr MemTracker::create_virtual_tracker( int64_t byte_limit, const std::string& label, const std::shared_ptr& parent, MemTrackerLevel level) { - std::shared_ptr reset_parent = parent ? parent : MemTracker::get_process_tracker(); + std::shared_ptr reset_parent = parent ? parent : thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker(); DCHECK(reset_parent); std::shared_ptr tracker( @@ -148,14 +149,11 @@ void MemTracker::init_virtual() { } MemTracker::~MemTracker() { - consume(_untracked_mem.exchange(0)); + consume(_untracked_mem.exchange(0)); // before memory_leak_check + // TCMalloc hook will be triggered during destructor memtracker, may cause crash. + if (_label == "Process") GLOBAL_STOP_THREAD_LOCAL_MEM_TRACKER(); if (!_virtual && config::memory_leak_detection) MemTracker::memory_leak_check(this); if (!_virtual && parent()) { - if (consumption() != 0) { - // TODO(zxy) delete after. Because some trackers do not manually release completely before destructing - _parent->release(consumption()); - } - // Do not call release on the parent tracker to avoid repeated releases. // Ensure that all consume/release are triggered by TCMalloc new/delete hook. lock_guard l(_parent->_child_trackers_lock); @@ -168,6 +166,7 @@ MemTracker::~MemTracker() { } void MemTracker::transfer_to_relative(MemTracker* dst, int64_t bytes) { + if (id() == dst->id()) return; DCHECK_EQ(_all_trackers.back(), dst->_all_trackers.back()) << "Must have same ancestor"; DCHECK(!dst->has_limit()); // Find the common ancestor and update trackers between 'this'/'dst' and @@ -183,8 +182,8 @@ void MemTracker::transfer_to_relative(MemTracker* dst, int64_t bytes) { --dst_ancestor_idx; } MemTracker* common_ancestor = _all_trackers[ancestor_idx]; - release(bytes, common_ancestor); - dst->consume(bytes, common_ancestor); + release_local(bytes, common_ancestor); + dst->consume_local(bytes, common_ancestor); } // Calling this on the query tracker results in output like: @@ -274,7 +273,7 @@ Status MemTracker::mem_limit_exceeded(RuntimeState* state, const std::string& de detail = fmt::format(detail, _label, _consumption->current_value(), _limit, PrettyPrinter::print(failed_allocation_size, TUnit::BYTES)); } - detail += " If query, can change the limit by session variable exec_mem_limit."; + detail += " If this is a query, can change the limit by session variable exec_mem_limit."; Status status = Status::MemoryLimitExceeded(detail); if (state != nullptr) state->log_error(detail); diff --git a/be/src/runtime/mem_tracker.h b/be/src/runtime/mem_tracker.h index ffd2756c9c..23bd239622 100644 --- a/be/src/runtime/mem_tracker.h +++ b/be/src/runtime/mem_tracker.h @@ -105,17 +105,12 @@ public: } // Increases consumption of this tracker and its ancestors by 'bytes'. - // up to (but not including) end_tracker. - // This is useful if we want to move tracking between trackers that share a common (i.e. end_tracker) - // ancestor. This happens when we want to update tracking on a particular mem tracker but the consumption - // against the limit recorded in one of its ancestors already happened. - void consume(int64_t bytes, MemTracker* end_tracker = nullptr) { + void consume(int64_t bytes) { if (bytes <= 0) { - release(-bytes, end_tracker); + release(-bytes); return; } for (auto& tracker : _all_trackers) { - if (tracker == end_tracker) return; tracker->_consumption->add(bytes); } } @@ -159,17 +154,15 @@ public: } // Decreases consumption of this tracker and its ancestors by 'bytes'. - // up to (but not including) end_tracker. - void release(int64_t bytes, MemTracker* end_tracker = nullptr) { + void release(int64_t bytes) { if (bytes < 0) { - consume(-bytes, end_tracker); + consume(-bytes); return; } if (bytes == 0) { return; } for (auto& tracker : _all_trackers) { - if (tracker == end_tracker) return; tracker->_consumption->add(-bytes); } } @@ -233,6 +226,29 @@ public: return Status::OK(); } + // up to (but not including) end_tracker. + // This is useful if we want to move tracking between trackers that share a common (i.e. end_tracker) + // ancestor. This happens when we want to update tracking on a particular mem tracker but the consumption + // against the limit recorded in one of its ancestors already happened. + void consume_local(int64_t bytes, MemTracker* end_tracker) { + DCHECK(end_tracker); + if (bytes == 0) return; + for (auto& tracker : _all_trackers) { + if (tracker == end_tracker) return; + tracker->_consumption->add(bytes); + } + } + + // up to (but not including) end_tracker. + void release_local(int64_t bytes, MemTracker* end_tracker) { + DCHECK(end_tracker); + if (bytes == 0) return; + for (auto& tracker : _all_trackers) { + if (tracker == end_tracker) return; + tracker->_consumption->add(-bytes); + } + } + // Transfer 'bytes' of consumption from this tracker to 'dst'. // updating all ancestors up to the first shared ancestor. Must not be used if // 'dst' has a limit, or an ancestor with a limit, that is not a common @@ -241,6 +257,7 @@ public: WARN_UNUSED_RESULT Status try_transfer_to(MemTracker* dst, int64_t bytes) { + if (id() == dst->id()) return Status::OK(); // Must release first, then consume release_cache(bytes); Status st = dst->try_consume_cache(bytes); @@ -253,6 +270,7 @@ public: // Forced transfer, 'dst' may limit exceed, and more ancestor trackers will be updated. void transfer_to(MemTracker* dst, int64_t bytes) { + if (id() == dst->id()) return; release_cache(bytes); dst->consume_cache(bytes); } @@ -294,7 +312,16 @@ public: bool limit_exceeded() const { return _limit >= 0 && _limit < consumption(); } int64_t limit() const { return _limit; } - void set_limit(int64_t limit) { _limit = limit; } + void set_limit(int64_t limit) { + DCHECK_GE(limit, -1); + DCHECK(!_virtual); + _limit = limit; + _limit_trackers.push_back(this); + for (const auto& tracker_weak : _child_trackers) { + std::shared_ptr tracker = tracker_weak.lock(); + if (tracker) tracker->_limit_trackers.push_back(this); + } + } bool has_limit() const { return _limit >= 0; } Status check_limit(int64_t bytes) { diff --git a/be/src/runtime/mem_tracker_task_pool.cpp b/be/src/runtime/mem_tracker_task_pool.cpp index d6d23f41db..2d43b927e8 100644 --- a/be/src/runtime/mem_tracker_task_pool.cpp +++ b/be/src/runtime/mem_tracker_task_pool.cpp @@ -67,7 +67,7 @@ void MemTrackerTaskPool::logout_task_mem_tracker() { for (auto it = _task_mem_trackers.begin(); it != _task_mem_trackers.end(); it++) { // No RuntimeState uses this task MemTracker, it is only referenced by this map, delete it if (it->second.use_count() == 1) { - if (config::memory_leak_detection && it->second->consumption() == 0) { + if (config::memory_leak_detection && it->second->consumption() != 0) { // If consumption is not equal to 0 before query mem tracker is destructed, // there are two possibilities in theory. // 1. A memory leak occurs. @@ -82,8 +82,8 @@ void MemTrackerTaskPool::logout_task_mem_tracker() { // the negative number of the current value of consume. LOG(WARNING) << "Task memory tracker memory leak:" << it->second->debug_string(); } - it->second->parent()->consume(-it->second->consumption(), - MemTracker::get_process_tracker().get()); + it->second->parent()->consume_local(-it->second->consumption(), + MemTracker::get_process_tracker().get()); expired_tasks.emplace_back(it->first); } } diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp index 937f9b367f..6f1306c87a 100644 --- a/be/src/runtime/memory/chunk_allocator.cpp +++ b/be/src/runtime/memory/chunk_allocator.cpp @@ -25,6 +25,7 @@ #include "runtime/mem_tracker.h" #include "runtime/memory/chunk.h" #include "runtime/memory/system_allocator.h" +#include "runtime/thread_context.h" #include "util/bit_util.h" #include "util/cpu_info.h" #include "util/doris_metrics.h" @@ -132,15 +133,19 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit) } Status ChunkAllocator::allocate(size_t size, Chunk* chunk, MemTracker* tracker, bool check_limits) { - // fast path: allocate from current core arena - if (tracker) { - if (check_limits) { - RETURN_IF_ERROR(tracker->try_consume_cache(size)); - } else { - tracker->consume_cache(size); - } + MemTracker* reset_tracker = + tracker ? tracker + : thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker().get(); + // In advance, transfer the memory ownership of allocate from ChunkAllocator::tracker to the parameter tracker. + // Next, if the allocate is successful, it will exit normally; + // if the allocate fails, return this part of the memory to the parameter tracker. + if (check_limits) { + RETURN_IF_ERROR(_mem_tracker->try_transfer_to(reset_tracker, size)); + } else { + _mem_tracker->transfer_to(reset_tracker, size); } + // fast path: allocate from current core arena int core_id = CpuInfo::get_current_core(); chunk->size = size; chunk->core_id = core_id; @@ -149,8 +154,6 @@ Status ChunkAllocator::allocate(size_t size, Chunk* chunk, MemTracker* tracker, DCHECK_GE(_reserved_bytes, 0); _reserved_bytes.fetch_sub(size); chunk_pool_local_core_alloc_count->increment(1); - // This means the chunk's memory ownership is transferred from ChunkAllocator to MemPool. - if (tracker) _mem_tracker->release_cache(size); return Status::OK(); } if (_reserved_bytes > size) { @@ -163,8 +166,6 @@ Status ChunkAllocator::allocate(size_t size, Chunk* chunk, MemTracker* tracker, chunk_pool_other_core_alloc_count->increment(1); // reset chunk's core_id to other chunk->core_id = core_id % _arenas.size(); - // This means the chunk's memory ownership is transferred from ChunkAllocator to MemPool. - if (tracker) _mem_tracker->release_cache(size); return Status::OK(); } } @@ -175,11 +176,15 @@ Status ChunkAllocator::allocate(size_t size, Chunk* chunk, MemTracker* tracker, SCOPED_RAW_TIMER(&cost_ns); // allocate from system allocator chunk->data = SystemAllocator::allocate(size); + // The allocated chunk is consumed in the tls mem tracker, we want to consume in the ChunkAllocator tracker, + // transfer memory ownership. TODO(zxy) replace with switch tls tracker + thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker()->transfer_to(_mem_tracker.get(), size); } chunk_pool_system_alloc_count->increment(1); chunk_pool_system_alloc_cost_ns->increment(cost_ns); if (chunk->data == nullptr) { - if (tracker) tracker->release_cache(size); + // allocate fails, return this part of the memory to the parameter tracker. + reset_tracker->transfer_to(_mem_tracker.get(), size); return Status::MemoryAllocFailed( fmt::format("ChunkAllocator failed to allocate chunk {} bytes", size)); } @@ -190,7 +195,6 @@ void ChunkAllocator::free(const Chunk& chunk, MemTracker* tracker) { if (chunk.core_id == -1) { return; } - if (tracker) tracker->transfer_to(_mem_tracker.get(), chunk.size); int64_t old_reserved_bytes = _reserved_bytes; int64_t new_reserved_bytes = 0; do { @@ -200,6 +204,13 @@ void ChunkAllocator::free(const Chunk& chunk, MemTracker* tracker) { { SCOPED_RAW_TIMER(&cost_ns); SystemAllocator::free(chunk.data, chunk.size); + // The freed chunk is released in the tls mem tracker. When the chunk was allocated, + // it was consumed in the parameter tracker, so if the tls mem tracker and the parameter + // tracker are different, transfer memory ownership. + if (tracker) + tracker->transfer_to( + thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker().get(), + chunk.size); } chunk_pool_system_free_count->increment(1); chunk_pool_system_free_cost_ns->increment(cost_ns); @@ -208,6 +219,13 @@ void ChunkAllocator::free(const Chunk& chunk, MemTracker* tracker) { } } while (!_reserved_bytes.compare_exchange_weak(old_reserved_bytes, new_reserved_bytes)); + // The chunk's memory ownership is transferred from MemPool to ChunkAllocator. + if (tracker) { + tracker->transfer_to(_mem_tracker.get(), chunk.size); + } else { + thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker()->transfer_to( + _mem_tracker.get(), chunk.size); + } _arenas[chunk.core_id]->push_free_chunk(chunk.data, chunk.size); } diff --git a/be/src/runtime/odbc_table_sink.cpp b/be/src/runtime/odbc_table_sink.cpp index adbe2e54a1..c813f38525 100644 --- a/be/src/runtime/odbc_table_sink.cpp +++ b/be/src/runtime/odbc_table_sink.cpp @@ -21,7 +21,6 @@ #include "exprs/expr.h" #include "runtime/runtime_state.h" -#include "runtime/mem_tracker.h" #include "util/runtime_profile.h" #include "util/debug_util.h" diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp index 5803250dfc..3f984a0270 100644 --- a/be/src/runtime/plan_fragment_executor.cpp +++ b/be/src/runtime/plan_fragment_executor.cpp @@ -34,6 +34,7 @@ #include "runtime/result_buffer_mgr.h" #include "runtime/result_queue_mgr.h" #include "runtime/row_batch.h" +#include "runtime/thread_context.h" #include "util/container_util.hpp" #include "util/cpu_info.h" #include "util/logging.h" @@ -87,6 +88,9 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request, _runtime_state->set_query_fragments_ctx(fragments_ctx); RETURN_IF_ERROR(_runtime_state->init_mem_trackers(_query_id)); + SCOPED_ATTACH_TASK_THREAD(_runtime_state->query_type(), print_id(_runtime_state->query_id()), + _runtime_state->fragment_instance_id(), + _runtime_state->instance_mem_tracker()); _runtime_state->set_be_number(request.backend_num); if (request.__isset.backend_id) { _runtime_state->set_backend_id(request.backend_id); @@ -194,8 +198,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request, _rows_produced_counter = ADD_COUNTER(profile(), "RowsProduced", TUnit::UNIT); _fragment_cpu_timer = ADD_TIMER(profile(), "FragmentCpuTime"); - _row_batch.reset(new RowBatch(_plan->row_desc(), _runtime_state->batch_size(), - _runtime_state->instance_mem_tracker().get())); + _row_batch.reset(new RowBatch(_plan->row_desc(), _runtime_state->batch_size())); _block.reset(new doris::vectorized::Block()); // _row_batch->tuple_data_pool()->set_limits(*_runtime_state->mem_trackers()); VLOG_NOTICE << "plan_root=\n" << _plan->debug_string(); @@ -244,6 +247,8 @@ Status PlanFragmentExecutor::open() { if (status.is_cancelled()) { if (_cancel_reason == PPlanFragmentCancelReason::CALL_RPC_ERROR) { status = Status::RuntimeError(_cancel_msg); + } else if (_cancel_reason == PPlanFragmentCancelReason::MEMORY_LIMIT_EXCEED) { + status = Status::MemoryLimitExceeded(_cancel_msg); } } @@ -436,6 +441,9 @@ void PlanFragmentExecutor::_collect_node_statistics() { } void PlanFragmentExecutor::report_profile() { + SCOPED_ATTACH_TASK_THREAD(_runtime_state->query_type(), print_id(_runtime_state->query_id()), + _runtime_state->fragment_instance_id(), + _runtime_state->instance_mem_tracker()); VLOG_FILE << "report_profile(): instance_id=" << _runtime_state->fragment_instance_id(); DCHECK(_report_status_cb); diff --git a/be/src/runtime/qsorter.cpp b/be/src/runtime/qsorter.cpp index 951b35a63a..cec1b6cd2e 100644 --- a/be/src/runtime/qsorter.cpp +++ b/be/src/runtime/qsorter.cpp @@ -81,7 +81,7 @@ QSorter::QSorter(const RowDescriptor& row_desc, const std::vector& RuntimeState* state) : _row_desc(row_desc), _order_expr_ctxs(order_expr_ctxs), - _tuple_pool(new MemPool(state->instance_mem_tracker().get())) {} + _tuple_pool(new MemPool("QSorter")) {} Status QSorter::prepare(RuntimeState* state) { RETURN_IF_ERROR(Expr::clone_if_not_exists(_order_expr_ctxs, state, &_lhs_expr_ctxs)); diff --git a/be/src/runtime/result_file_sink.cpp b/be/src/runtime/result_file_sink.cpp index a26e4a38d7..6f5e4d540e 100644 --- a/be/src/runtime/result_file_sink.cpp +++ b/be/src/runtime/result_file_sink.cpp @@ -109,11 +109,12 @@ Status ResultFileSink::prepare(RuntimeState* state) { _local_bytes_send_counter = ADD_COUNTER(profile(), "LocalBytesSent", TUnit::BYTES); _uncompressed_bytes_counter = ADD_COUNTER(profile(), "UncompressedRowBatchSize", TUnit::BYTES); + // TODO(zxy) used after _mem_tracker = MemTracker::create_tracker( -1, "ResultFileSink:" + print_id(state->fragment_instance_id()), state->instance_mem_tracker(), MemTrackerLevel::VERBOSE, _profile); // create writer - _output_batch = new RowBatch(_output_row_descriptor, 1024, _mem_tracker.get()); + _output_batch = new RowBatch(_output_row_descriptor, 1024); _writer.reset(new (std::nothrow) FileResultWriter( _file_opts.get(), _storage_type, state->fragment_instance_id(), _output_expr_ctxs, _profile, nullptr, _output_batch, state->return_object_data_as_binary())); diff --git a/be/src/runtime/result_sink.cpp b/be/src/runtime/result_sink.cpp index 610f105074..b83ae8af3b 100644 --- a/be/src/runtime/result_sink.cpp +++ b/be/src/runtime/result_sink.cpp @@ -23,6 +23,7 @@ #include "runtime/exec_env.h" #include "runtime/file_result_writer.h" #include "runtime/mem_tracker.h" +#include "runtime/thread_context.h" #include "runtime/mysql_result_writer.h" #include "runtime/result_buffer_mgr.h" #include "runtime/row_batch.h" @@ -100,6 +101,10 @@ Status ResultSink::open(RuntimeState* state) { } Status ResultSink::send(RuntimeState* state, RowBatch* batch) { + // The memory consumption in the process of sending the results is not recorded in the query memory. + // 1. Avoid the query being cancelled when the memory limit is reached after the query result comes out. + // 2. If record this memory, also need to record on the receiving end, need to consider the life cycle of MemTracker. + SCOPED_STOP_THREAD_LOCAL_MEM_TRACKER(); return _writer->append_row_batch(batch); } diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp index 1b6deee4ea..e89ea611ed 100644 --- a/be/src/runtime/row_batch.cpp +++ b/be/src/runtime/row_batch.cpp @@ -27,6 +27,7 @@ #include "runtime/collection_value.h" #include "runtime/exec_env.h" #include "runtime/runtime_state.h" +#include "runtime/thread_context.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "vec/columns/column_vector.h" @@ -39,8 +40,8 @@ namespace doris { const int RowBatch::AT_CAPACITY_MEM_USAGE = 8 * 1024 * 1024; const int RowBatch::FIXED_LEN_BUFFER_LIMIT = AT_CAPACITY_MEM_USAGE / 2; -RowBatch::RowBatch(const RowDescriptor& row_desc, int capacity, MemTracker* mem_tracker) - : _mem_tracker(mem_tracker), +RowBatch::RowBatch(const RowDescriptor& row_desc, int capacity) + : _mem_tracker(thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker()), _has_in_flight_row(false), _num_rows(0), _num_uncommitted_rows(0), @@ -51,13 +52,10 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, int capacity, MemTracker* mem_ _row_desc(row_desc), _auxiliary_mem_usage(0), _need_to_return(false), - _tuple_data_pool(_mem_tracker) { - DCHECK(_mem_tracker != nullptr); + _tuple_data_pool() { DCHECK_GT(capacity, 0); _tuple_ptrs_size = _capacity * _num_tuples_per_row * sizeof(Tuple*); DCHECK_GT(_tuple_ptrs_size, 0); - // TODO: switch to Init() pattern so we can check memory limit and return Status. - _mem_tracker->consume(_tuple_ptrs_size); _tuple_ptrs = (Tuple**)(malloc(_tuple_ptrs_size)); DCHECK(_tuple_ptrs != nullptr); } @@ -68,8 +66,8 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, int capacity, MemTracker* mem_ // xfer += iprot->readString(this->tuple_data[_i9]); // to allocated string data in special mempool // (change via python script that runs over Data_types.cc) -RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch, MemTracker* tracker) - : _mem_tracker(tracker), +RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch) + : _mem_tracker(thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker()), _has_in_flight_row(false), _num_rows(input_batch.num_rows()), _num_uncommitted_rows(0), @@ -80,12 +78,9 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch, _row_desc(row_desc), _auxiliary_mem_usage(0), _need_to_return(false), - _tuple_data_pool(_mem_tracker) { - DCHECK(_mem_tracker != nullptr); + _tuple_data_pool() { _tuple_ptrs_size = _num_rows * _num_tuples_per_row * sizeof(Tuple*); DCHECK_GT(_tuple_ptrs_size, 0); - // TODO: switch to Init() pattern so we can check memory limit and return Status. - _mem_tracker->consume(_tuple_ptrs_size); _tuple_ptrs = (Tuple**)(malloc(_tuple_ptrs_size)); DCHECK(_tuple_ptrs != nullptr); @@ -227,7 +222,6 @@ void RowBatch::clear() { } DCHECK(_tuple_ptrs != nullptr); free(_tuple_ptrs); - _mem_tracker->release(_tuple_ptrs_size); _tuple_ptrs = nullptr; _cleared = true; } @@ -344,7 +338,7 @@ void RowBatch::add_io_buffer(DiskIoMgr::BufferDescriptor* buffer) { DCHECK(buffer != nullptr); _io_buffers.push_back(buffer); _auxiliary_mem_usage += buffer->buffer_len(); - buffer->set_mem_tracker(std::shared_ptr(_mem_tracker)); // TODO(yingchun): fixme + buffer->update_mem_tracker(_mem_tracker.get()); } Status RowBatch::resize_and_allocate_tuple_buffer(RuntimeState* state, int64_t* tuple_buffer_size, @@ -423,8 +417,7 @@ void RowBatch::transfer_resource_ownership(RowBatch* dest) { DiskIoMgr::BufferDescriptor* buffer = _io_buffers[i]; dest->_io_buffers.push_back(buffer); dest->_auxiliary_mem_usage += buffer->buffer_len(); - buffer->set_mem_tracker( - std::shared_ptr(dest->_mem_tracker)); // TODO(yingchun): fixme + buffer->update_mem_tracker(dest->_mem_tracker.get()); } _io_buffers.clear(); @@ -533,7 +526,7 @@ void RowBatch::acquire_state(RowBatch* src) { DiskIoMgr::BufferDescriptor* buffer = src->_io_buffers[i]; _io_buffers.push_back(buffer); _auxiliary_mem_usage += buffer->buffer_len(); - buffer->set_mem_tracker(std::shared_ptr(_mem_tracker)); // TODO(yingchun): fixme + buffer->update_mem_tracker(_mem_tracker.get()); } src->_io_buffers.clear(); src->_auxiliary_mem_usage = 0; diff --git a/be/src/runtime/row_batch.h b/be/src/runtime/row_batch.h index 070a1e578f..1ee54989cd 100644 --- a/be/src/runtime/row_batch.h +++ b/be/src/runtime/row_batch.h @@ -83,14 +83,14 @@ public: // Create RowBatch for a maximum of 'capacity' rows of tuples specified // by 'row_desc'. - RowBatch(const RowDescriptor& row_desc, int capacity, MemTracker* mem_tracker); + RowBatch(const RowDescriptor& row_desc, int capacity); // Populate a row batch from input_batch by copying input_batch's // tuple_data into the row batch's mempool and converting all offsets // in the data back into pointers. // TODO: figure out how to transfer the data from input_batch to this RowBatch // (so that we don't need to make yet another copy) - RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch, MemTracker* tracker); + RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch); // Releases all resources accumulated at this row batch. This includes // - tuple_ptrs @@ -394,7 +394,14 @@ public: std::string to_string(); private: - MemTracker* _mem_tracker; // not owned + // Back up the current thread local mem tracker. Used when transferring buffer memory between row batches. + // Memory operations in the actual row batch are automatically recorded in the thread local mem tracker. + // Change the recording position in the mem tracker specified by the external switch. + // Note: Raw pointers cannot be used directly, because when transferring_resource_ownership to other RowBatch, + // the src mem tracker when creating the current RowBatch may have been destroyed. + // At this time, the transfer of memory ownership cannot be completed, resulting in consumption > 0 + // when the src mem tracker is destructed, and the memory statistics of the dst mem tracker are missing. + std::shared_ptr _mem_tracker; // Close owned tuple streams and delete if needed. void close_tuple_streams(); diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index 80bdae034f..6518391218 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -27,6 +27,7 @@ #include "runtime/plan_fragment_executor.h" #include "runtime/runtime_filter_mgr.h" #include "runtime/runtime_state.h" +#include "runtime/thread_context.h" #include "service/brpc.h" #include "util/brpc_client_cache.h" #include "util/time.h" @@ -47,8 +48,7 @@ RuntimeFilterMgr::~RuntimeFilterMgr() {} Status RuntimeFilterMgr::init() { DCHECK(_state->instance_mem_tracker() != nullptr); - _tracker = MemTracker::create_tracker(-1, "RuntimeFilterMgr", _state->instance_mem_tracker(), - MemTrackerLevel::TASK); + _tracker = MemTracker::create_tracker(-1, "RuntimeFilterMgr", _state->instance_mem_tracker()); return Status::OK(); } @@ -103,8 +103,8 @@ Status RuntimeFilterMgr::regist_filter(const RuntimeFilterRole role, const TRunt RuntimeFilterMgrVal filter_mgr_val; filter_mgr_val.role = role; - RETURN_IF_ERROR(IRuntimeFilter::create(_state, _tracker.get(), &_pool, &desc, &options, - role, node_id, &filter_mgr_val.filter)); + RETURN_IF_ERROR(IRuntimeFilter::create(_state, &_pool, &desc, &options, role, node_id, + &filter_mgr_val.filter)); filter_map->emplace(key, filter_mgr_val); @@ -146,18 +146,17 @@ Status RuntimeFilterMergeControllerEntity::_init_with_desc( std::shared_ptr cntVal = std::make_shared(); // runtime_filter_desc and target will be released, // so we need to copy to cntVal - // TODO: tracker should add a name cntVal->producer_size = producer_size; cntVal->runtime_filter_desc = *runtime_filter_desc; cntVal->target_info = *target_info; cntVal->pool.reset(new ObjectPool()); - cntVal->tracker = MemTracker::create_tracker(); - cntVal->filter = cntVal->pool->add( - new IRuntimeFilter(nullptr, cntVal->tracker.get(), cntVal->pool.get())); + cntVal->filter = cntVal->pool->add(new IRuntimeFilter(nullptr, cntVal->pool.get())); std::string filter_id = std::to_string(runtime_filter_desc->filter_id); // LOG(INFO) << "entity filter id:" << filter_id; cntVal->filter->init_with_desc(&cntVal->runtime_filter_desc, query_options, _fragment_instance_id); + cntVal->tracker = MemTracker::create_tracker( + -1, thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker()->label() + ":FilterID:" + filter_id); _filter_map.emplace(filter_id, cntVal); return Status::OK(); } @@ -167,6 +166,8 @@ Status RuntimeFilterMergeControllerEntity::init(UniqueId query_id, UniqueId frag const TQueryOptions& query_options) { _query_id = query_id; _fragment_instance_id = fragment_instance_id; + // TODO(zxy) used after + _mem_tracker = MemTracker::create_tracker(-1, "RuntimeFilterMergeControllerEntity", nullptr); for (auto& filterid_to_desc : runtime_filter_params.rid_to_runtime_filter) { int filter_id = filterid_to_desc.first; const auto& target_iter = runtime_filter_params.rid_to_target_param.find(filter_id); @@ -199,11 +200,9 @@ Status RuntimeFilterMergeControllerEntity::merge(const PMergeFilterRequest* requ MergeRuntimeFilterParams params; params.data = data; params.request = request; - std::shared_ptr tracker = iter->second->tracker; ObjectPool* pool = iter->second->pool.get(); RuntimeFilterWrapperHolder holder; - RETURN_IF_ERROR( - IRuntimeFilter::create_wrapper(¶ms, tracker.get(), pool, holder.getHandle())); + RETURN_IF_ERROR(IRuntimeFilter::create_wrapper(¶ms, pool, holder.getHandle())); RETURN_IF_ERROR(cntVal->filter->merge_from(holder.getHandle()->get())); cntVal->arrive_id.insert(UniqueId(request->fragment_id()).to_string()); merged_size = cntVal->arrive_id.size(); diff --git a/be/src/runtime/runtime_filter_mgr.h b/be/src/runtime/runtime_filter_mgr.h index 9d2bd532fd..db336c76cc 100644 --- a/be/src/runtime/runtime_filter_mgr.h +++ b/be/src/runtime/runtime_filter_mgr.h @@ -137,6 +137,7 @@ private: UniqueId _fragment_instance_id; // protect _filter_map std::mutex _filter_map_mutex; + std::shared_ptr _mem_tracker; // TODO: convert filter id to i32 // filter-id -> val std::map> _filter_map; diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index 6fd6dc70a4..e274c0e5bd 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -284,7 +284,7 @@ Status RuntimeState::create_block_mgr() { block_mgr_limit = std::numeric_limits::max(); } RETURN_IF_ERROR(BufferedBlockMgr2::create( - this, _query_mem_tracker, runtime_profile(), _exec_env->tmp_file_mgr(), block_mgr_limit, + this, runtime_profile(), _exec_env->tmp_file_mgr(), block_mgr_limit, _exec_env->disk_io_mgr()->max_read_buffer_size(), &_block_mgr2)); return Status::OK(); } diff --git a/be/src/runtime/sorted_run_merger.cc b/be/src/runtime/sorted_run_merger.cc index 28baab462e..f86c261b8d 100644 --- a/be/src/runtime/sorted_run_merger.cc +++ b/be/src/runtime/sorted_run_merger.cc @@ -24,6 +24,7 @@ #include "runtime/descriptors.h" #include "runtime/row_batch.h" #include "runtime/sorter.h" +#include "runtime/thread_context.h" #include "runtime/tuple_row.h" #include "util/debug_util.h" #include "util/defer_op.h" @@ -123,8 +124,9 @@ public: // Retrieves the first batch of sorted rows from the run. Status init(bool* done) override { *done = false; - _pull_task_thread = std::thread( - &SortedRunMerger::ParallelBatchedRowSupplier::process_sorted_run_task, this); + _pull_task_thread = + std::thread(&SortedRunMerger::ParallelBatchedRowSupplier::process_sorted_run_task, + this, thread_local_ctx.get()->_thread_mem_tracker_mgr->mem_tracker()); RETURN_IF_ERROR(next(nullptr, done)); return Status::OK(); @@ -177,7 +179,8 @@ private: // signal of new batch or the eos/cancelled condition std::condition_variable _batch_prepared_cv; - void process_sorted_run_task() { + void process_sorted_run_task(const std::shared_ptr& mem_tracker) { + SCOPED_ATTACH_TASK_THREAD(ThreadContext::TaskType::QUERY, mem_tracker); std::unique_lock lock(_mutex); while (true) { _batch_prepared_cv.wait(lock, [this]() { return !_backup_ready.load(); }); @@ -307,11 +310,9 @@ Status SortedRunMerger::get_next(RowBatch* output_batch, bool* eos) { ChildSortedRunMerger::ChildSortedRunMerger(const TupleRowComparator& compare_less_than, RowDescriptor* row_desc, RuntimeProfile* profile, - MemTracker* parent, uint32_t row_batch_size, - bool deep_copy_input) + uint32_t row_batch_size, bool deep_copy_input) : SortedRunMerger(compare_less_than, row_desc, profile, deep_copy_input), _eos(false), - _parent(parent), _row_batch_size(row_batch_size) { _get_next_timer = ADD_TIMER(profile, "ChildMergeGetNext"); _get_next_batch_timer = ADD_TIMER(profile, "ChildMergeGetNextBatch"); @@ -323,7 +324,7 @@ Status ChildSortedRunMerger::get_batch(RowBatch** output_batch) { return Status::OK(); } - _current_row_batch.reset(new RowBatch(*_input_row_desc, _row_batch_size, _parent)); + _current_row_batch.reset(new RowBatch(*_input_row_desc, _row_batch_size)); bool eos = false; RETURN_IF_ERROR(get_next(_current_row_batch.get(), &eos)); diff --git a/be/src/runtime/sorted_run_merger.h b/be/src/runtime/sorted_run_merger.h index b73cdc1b0c..c448ac8b0c 100644 --- a/be/src/runtime/sorted_run_merger.h +++ b/be/src/runtime/sorted_run_merger.h @@ -109,8 +109,7 @@ protected: class ChildSortedRunMerger : public SortedRunMerger { public: ChildSortedRunMerger(const TupleRowComparator& compare_less_than, RowDescriptor* row_desc, - RuntimeProfile* profile, MemTracker* _parent, uint32_t row_batch_size, - bool deep_copy_input); + RuntimeProfile* profile, uint32_t row_batch_size, bool deep_copy_input); Status get_batch(RowBatch** output_batch) override; @@ -121,8 +120,6 @@ private: // The data in merger is exhaust bool _eos = false; - MemTracker* _parent; - uint32_t _row_batch_size; }; diff --git a/be/src/runtime/spill_sorter.cc b/be/src/runtime/spill_sorter.cc index 79c3b17c28..fc9213501e 100644 --- a/be/src/runtime/spill_sorter.cc +++ b/be/src/runtime/spill_sorter.cc @@ -638,8 +638,7 @@ Status SpillSorter::Run::prepare_read() { _pin_next_fixed_len_block = _pin_next_var_len_block = false; _num_tuples_returned = 0; - _buffered_batch.reset(new RowBatch(*_sorter->_output_row_desc, _sorter->_state->batch_size(), - _sorter->_mem_tracker.get())); + _buffered_batch.reset(new RowBatch(*_sorter->_output_row_desc, _sorter->_state->batch_size())); // If the run is pinned, merge is not invoked, so _buffered_batch is not needed // and the individual blocks do not need to be pinned. @@ -1251,8 +1250,7 @@ Status SpillSorter::merge_intermediate_runs() { std::min(max_runs_per_intermediate_merge, _sorted_runs.size() - max_runs_per_intermediate_merge); RETURN_IF_ERROR(create_merger(num_runs_to_merge)); - RowBatch intermediate_merge_batch(*_output_row_desc, _state->batch_size(), - _mem_tracker.get()); + RowBatch intermediate_merge_batch(*_output_row_desc, _state->batch_size()); // merged_run is the new sorted run that is produced by the intermediate merge. Run* merged_run = _obj_pool.add(new Run(this, _output_row_desc->tuple_descriptors()[0], false)); diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp index 7c309a0260..e4e0b78a1b 100644 --- a/be/src/runtime/tablets_channel.cpp +++ b/be/src/runtime/tablets_channel.cpp @@ -31,11 +31,9 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(tablet_writer_count, MetricUnit::NOUNIT); std::atomic TabletsChannel::_s_tablet_writer_count; -TabletsChannel::TabletsChannel(const TabletsChannelKey& key, - const std::shared_ptr& mem_tracker, - bool is_high_priority) +TabletsChannel::TabletsChannel(const TabletsChannelKey& key, bool is_high_priority) : _key(key), _state(kInitialized), _closed_senders(64), _is_high_priority(is_high_priority) { - _mem_tracker = MemTracker::create_tracker(-1, "TabletsChannel:" + std::to_string(key.index_id), mem_tracker); + _mem_tracker = MemTracker::create_tracker(-1, "TabletsChannel:" + std::to_string(key.index_id)); static std::once_flag once_flag; std::call_once(once_flag, [] { REGISTER_HOOK_METRIC(tablet_writer_count, [&]() { return _s_tablet_writer_count.load(); }); @@ -101,7 +99,7 @@ Status TabletsChannel::add_batch(const PTabletWriterAddBatchRequest& request, } } - RowBatch row_batch(*_row_desc, request.row_batch(), _mem_tracker.get()); + RowBatch row_batch(*_row_desc, request.row_batch()); std::unordered_map /* row index */> tablet_to_rowidxs; for (int i = 0; i < request.tablet_ids_size(); ++i) { int64_t tablet_id = request.tablet_ids(i); @@ -286,7 +284,7 @@ Status TabletsChannel::_open_all_writers(const PTabletWriterOpenRequest& request wrequest.is_high_priority = _is_high_priority; DeltaWriter* writer = nullptr; - auto st = DeltaWriter::open(&wrequest, _mem_tracker, &writer); + auto st = DeltaWriter::open(&wrequest, &writer); if (st != OLAP_SUCCESS) { std::stringstream ss; ss << "open delta writer failed, tablet_id=" << tablet.tablet_id() diff --git a/be/src/runtime/tablets_channel.h b/be/src/runtime/tablets_channel.h index e99ac6264b..226b2b76db 100644 --- a/be/src/runtime/tablets_channel.h +++ b/be/src/runtime/tablets_channel.h @@ -54,7 +54,7 @@ class OlapTableSchemaParam; // Write channel for a particular (load, index). class TabletsChannel { public: - TabletsChannel(const TabletsChannelKey& key, const std::shared_ptr& mem_tracker, bool is_high_priority); + TabletsChannel(const TabletsChannelKey& key, bool is_high_priority); ~TabletsChannel(); diff --git a/be/src/runtime/tcmalloc_hook.h b/be/src/runtime/tcmalloc_hook.h new file mode 100644 index 0000000000..4e3fbb804c --- /dev/null +++ b/be/src/runtime/tcmalloc_hook.h @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "runtime/thread_context.h" + +// Notice: modify the command in New/Delete Hook should be careful enough!, +// and should be as simple as possible, otherwise it may cause weird errors. E.g: +// 1. The first New Hook call of the process may be before some variables of +// the process are initialized. +// 2. Allocating memory in the Hook command causes the Hook to be entered again, +// infinite recursion. +// 3. TCMalloc hook will be triggered during the process of initializing/Destructor +// memtracker shared_ptr, Using the object pointed to by this memtracker shared_ptr +// in TCMalloc hook may cause crash. +// 4. Modifying additional thread local variables in ThreadContext construction and +// destructor to control the behavior of consume can lead to unexpected behavior, +// like this: if (LIKELY(doris::start_thread_mem_tracker)) { +void new_hook(const void* ptr, size_t size) { + doris::thread_local_ctx.get()->consume_mem(tc_nallocx(size, 0)); +} + +void delete_hook(const void* ptr) { + doris::thread_local_ctx.get()->release_mem(tc_malloc_size(const_cast(ptr))); +} + +void init_hook() { + MallocHook::AddNewHook(&new_hook); + MallocHook::AddDeleteHook(&delete_hook); +} + +void destroy_hook() { + MallocHook::RemoveNewHook(&new_hook); + MallocHook::RemoveDeleteHook(&delete_hook); +} diff --git a/be/src/runtime/thread_context.h b/be/src/runtime/thread_context.h index 7c2a97faac..1718e8d4ec 100644 --- a/be/src/runtime/thread_context.h +++ b/be/src/runtime/thread_context.h @@ -22,10 +22,17 @@ #include "common/logging.h" #include "gen_cpp/Types_types.h" +#include "runtime/runtime_state.h" +#include "runtime/thread_mem_tracker_mgr.h" #include "runtime/threadlocal.h" +// Attach to task when thread starts #define SCOPED_ATTACH_TASK_THREAD(type, ...) \ - auto VARNAME_LINENUM(attach_task_thread) = AttachTaskThread(type, ## __VA_ARGS__) + auto VARNAME_LINENUM(attach_task_thread) = AttachTaskThread(type, ##__VA_ARGS__) +#define SCOPED_STOP_THREAD_LOCAL_MEM_TRACKER() \ + auto VARNAME_LINENUM(stop_tracker) = StopThreadMemTracker(true) +#define GLOBAL_STOP_THREAD_LOCAL_MEM_TRACKER() \ + auto VARNAME_LINENUM(stop_tracker) = StopThreadMemTracker(false) namespace doris { @@ -48,31 +55,66 @@ public: COMPACTION = 3 // to be added ... }; + inline static const std::string TaskTypeStr[] = {"UNKNOWN", "QUERY", "LOAD", "COMPACTION"}; public: - ThreadContext() : _thread_id(std::this_thread::get_id()), _type(TaskType::UNKNOWN) {} + ThreadContext() : _thread_id(std::this_thread::get_id()), _type(TaskType::UNKNOWN) { + _thread_mem_tracker_mgr.reset(new ThreadMemTrackerMgr()); + std::stringstream ss; + ss << _thread_id; + _thread_id_str = ss.str(); + } void attach(const TaskType& type, const std::string& task_id, - const TUniqueId& fragment_instance_id) { + const TUniqueId& fragment_instance_id, + const std::shared_ptr& mem_tracker) { DCHECK(_type == TaskType::UNKNOWN && _task_id == ""); _type = type; _task_id = task_id; _fragment_instance_id = fragment_instance_id; + _thread_mem_tracker_mgr->attach_task(task_type_string(_type), task_id, fragment_instance_id, + mem_tracker); } void detach() { _type = TaskType::UNKNOWN; _task_id = ""; _fragment_instance_id = TUniqueId(); + _thread_mem_tracker_mgr->detach_task(); } - const std::string type() const; const std::string& task_id() const { return _task_id; } const std::thread::id& thread_id() const { return _thread_id; } + const std::string& thread_id_str() const { return _thread_id_str; } const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; } + inline static const std::string task_type_string(ThreadContext::TaskType type) { + return TaskTypeStr[type]; + } + + void consume_mem(int64_t size) { + if (start_thread_mem_tracker) { + _thread_mem_tracker_mgr->cache_consume(size); + } + } + + void release_mem(int64_t size) { + if (start_thread_mem_tracker) { + _thread_mem_tracker_mgr->cache_consume(-size); + } + } + + // After _thread_mem_tracker_mgr is initialized, the current thread TCMalloc Hook starts to + // consume/release mem_tracker. + // Note that the use of shared_ptr will cause a crash. The guess is that there is an + // intermediate state during the copy construction of shared_ptr. Shared_ptr is not equal + // to nullptr, but the object it points to is not initialized. At this time, when the memory + // is released somewhere, the TCMalloc hook is triggered to cause the crash. + std::unique_ptr _thread_mem_tracker_mgr; + private: std::thread::id _thread_id; + std::string _thread_id_str; TaskType _type; std::string _task_id; TUniqueId _fragment_instance_id; @@ -113,31 +155,77 @@ private: inline thread_local ThreadContextPtr thread_local_ctx; -inline const std::string task_type_string(ThreadContext::TaskType type) { - switch (type) { - case ThreadContext::TaskType::QUERY: - return "QUERY"; - case ThreadContext::TaskType::LOAD: - return "LOAD"; - case ThreadContext::TaskType::COMPACTION: - return "COMPACTION"; - default: - return "UNKNOWN"; - } -} - -inline const std::string ThreadContext::type() const { - return task_type_string(_type); -} - class AttachTaskThread { public: - explicit AttachTaskThread(const ThreadContext::TaskType& type, const std::string& task_id = "", - const TUniqueId& fragment_instance_id = TUniqueId()) { - thread_local_ctx.get()->attach(type, task_id, fragment_instance_id); + explicit AttachTaskThread(const ThreadContext::TaskType& type, const std::string& task_id, + const TUniqueId& fragment_instance_id = TUniqueId(), + const std::shared_ptr& mem_tracker = nullptr) { + DCHECK(task_id != ""); + thread_local_ctx.get()->attach(type, task_id, fragment_instance_id, mem_tracker); + } + + explicit AttachTaskThread(const ThreadContext::TaskType& type, + const std::shared_ptr& mem_tracker) { + DCHECK(mem_tracker != nullptr); + thread_local_ctx.get()->attach(type, "", TUniqueId(), mem_tracker); + } + + explicit AttachTaskThread(const TQueryType::type& query_type, + const std::shared_ptr& mem_tracker) { + DCHECK(mem_tracker != nullptr); + thread_local_ctx.get()->attach(query_to_task_type(query_type), "", TUniqueId(), + mem_tracker); + } + + explicit AttachTaskThread(const TQueryType::type& query_type, const std::string& task_id, + const TUniqueId& fragment_instance_id, + const std::shared_ptr& mem_tracker) { + DCHECK(task_id != ""); + DCHECK(fragment_instance_id != TUniqueId()); + DCHECK(mem_tracker != nullptr); + thread_local_ctx.get()->attach(query_to_task_type(query_type), task_id, + fragment_instance_id, mem_tracker); + } + + explicit AttachTaskThread(const RuntimeState* runtime_state, + const std::shared_ptr& mem_tracker) { +#ifndef BE_TEST + DCHECK(print_id(runtime_state->query_id()) != ""); + DCHECK(runtime_state->fragment_instance_id() != TUniqueId()); + DCHECK(mem_tracker != nullptr); + thread_local_ctx.get()->attach(query_to_task_type(runtime_state->query_type()), + print_id(runtime_state->query_id()), + runtime_state->fragment_instance_id(), mem_tracker); +#endif + } + + const ThreadContext::TaskType query_to_task_type(const TQueryType::type& query_type) { + switch (query_type) { + case TQueryType::SELECT: + return ThreadContext::TaskType::QUERY; + case TQueryType::LOAD: + return ThreadContext::TaskType::LOAD; + default: + DCHECK(false); + return ThreadContext::TaskType::UNKNOWN; + } } ~AttachTaskThread() { thread_local_ctx.get()->detach(); } }; +class StopThreadMemTracker { +public: + explicit StopThreadMemTracker(const bool scope = true) : _scope(scope) { + start_thread_mem_tracker = false; + } + + ~StopThreadMemTracker() { + if (_scope == true) start_thread_mem_tracker = true; + } + +private: + bool _scope; +}; + } // namespace doris diff --git a/be/src/runtime/thread_mem_tracker_mgr.cpp b/be/src/runtime/thread_mem_tracker_mgr.cpp new file mode 100644 index 0000000000..12b64d4a82 --- /dev/null +++ b/be/src/runtime/thread_mem_tracker_mgr.cpp @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/thread_mem_tracker_mgr.h" + +#include "runtime/mem_tracker_task_pool.h" +#include "service/backend_options.h" + +namespace doris { + +void ThreadMemTrackerMgr::attach_task(const std::string& action_type, const std::string& task_id, + const TUniqueId& fragment_instance_id, + const std::shared_ptr& mem_tracker) { + _task_id = task_id; + _fragment_instance_id = fragment_instance_id; + _consume_err_call_back.update(action_type, true, nullptr); + if (mem_tracker == nullptr) { +#ifdef BE_TEST + if (ExecEnv::GetInstance()->task_pool_mem_tracker_registry() == nullptr) { + return; + } +#endif + _temp_task_mem_tracker = ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->get_task_mem_tracker(task_id); + update_tracker(_temp_task_mem_tracker); + } else { + update_tracker(mem_tracker); + } +} + +void ThreadMemTrackerMgr::detach_task() { + _task_id = ""; + _fragment_instance_id = TUniqueId(); + _consume_err_call_back.init(); + clear_untracked_mems(); + _tracker_id = "process"; + // The following memory changes for the two map operations of _untracked_mems and _mem_trackers + // will be re-recorded in _untracked_mem. + _untracked_mems.clear(); + _untracked_mems["process"] = 0; + _mem_trackers.clear(); + _mem_trackers["process"] = MemTracker::get_process_tracker(); +} + +void ThreadMemTrackerMgr::exceeded_cancel_task(const std::string& cancel_details) { + _temp_task_mem_tracker = + ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->get_task_mem_tracker( + _task_id); + DCHECK(_temp_task_mem_tracker); + if (_fragment_instance_id != TUniqueId()) { + ExecEnv::GetInstance()->fragment_mgr()->cancel( + _fragment_instance_id, PPlanFragmentCancelReason::MEMORY_LIMIT_EXCEED, + cancel_details); + _fragment_instance_id = TUniqueId(); // Make sure it will only be canceled once + } +} + +void ThreadMemTrackerMgr::exceeded(int64_t mem_usage, Status st) { + auto rst = _mem_trackers[_tracker_id]->mem_limit_exceeded( + nullptr, "In TCMalloc Hook, " + _consume_err_call_back.action_type, mem_usage, st); + if (_consume_err_call_back.call_back_func != nullptr) { + _consume_err_call_back.call_back_func(); + } + if (_task_id != "") { + if (_consume_err_call_back.cancel_task == true) { + exceeded_cancel_task(rst.to_string()); + } else { + // TODO(zxy) Need other processing, or log (not too often). + } + } else { + // TODO(zxy) Need other processing, or log (not too often). + } +} +} // namespace doris diff --git a/be/src/runtime/thread_mem_tracker_mgr.h b/be/src/runtime/thread_mem_tracker_mgr.h new file mode 100644 index 0000000000..7401910eb2 --- /dev/null +++ b/be/src/runtime/thread_mem_tracker_mgr.h @@ -0,0 +1,197 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "runtime/exec_env.h" +#include "runtime/fragment_mgr.h" +#include "runtime/mem_tracker.h" + +namespace doris { + +typedef void (*ERRCALLBACK)(); + +struct ConsumeErrCallBackInfo { + std::string action_type; + bool cancel_task; // Whether to cancel the task when the current tracker exceeds the limit + ERRCALLBACK call_back_func; + + ConsumeErrCallBackInfo() { + init(); + } + + ConsumeErrCallBackInfo(std::string action_type, bool cancel_task, ERRCALLBACK call_back_func) + : action_type(action_type), cancel_task(cancel_task), call_back_func(call_back_func) {} + + void update(std::string new_action_type, bool new_cancel_task, ERRCALLBACK new_call_back_func) { + action_type = new_action_type; + cancel_task = new_cancel_task; + call_back_func = new_call_back_func; + } + + void init() { + action_type = ""; + cancel_task = false; + call_back_func = nullptr; + } +}; + +// If there is a memory new/delete operation in the consume method, it may enter infinite recursion. +// Note: After the tracker is stopped, the memory alloc in the consume method should be released in time, +// otherwise the MemTracker statistics will be inaccurate. +// In some cases, we want to turn off thread automatic memory statistics, manually call consume. +// In addition, when ~RootTracker, TCMalloc delete hook release RootTracker will crash. +inline thread_local bool start_thread_mem_tracker = false; + +// TCMalloc new/delete Hook is counted in the memory_tracker of the current thread. +// +// In the original design, the MemTracker consume method is called before the memory is allocated. +// If the consume succeeds, the memory is actually allocated, otherwise an exception is thrown. +// But the statistics of memory through TCMalloc new/delete Hook are after the memory is actually allocated, +// which is different from the previous behavior. Therefore, when alloc for some large memory, +// need to manually call cosume after stop_mem_tracker, and then start_mem_tracker. +class ThreadMemTrackerMgr { +public: + ThreadMemTrackerMgr() { + _mem_trackers["process"] = MemTracker::get_process_tracker(); + _untracked_mems["process"] = 0; + _tracker_id = "process"; + start_thread_mem_tracker = true; + } + ~ThreadMemTrackerMgr() { + clear_untracked_mems(); + start_thread_mem_tracker = false; + } + + void clear_untracked_mems() { + for(auto untracked_mem : _untracked_mems) { + if (untracked_mem.second != 0) { + DCHECK(_mem_trackers[untracked_mem.first]); + _mem_trackers[untracked_mem.first]->consume(untracked_mem.second); + } + } + _mem_trackers[_tracker_id]->consume(_untracked_mem); + _untracked_mem = 0; + } + + // After attach, the current thread TCMalloc Hook starts to consume/release task mem_tracker + void attach_task(const std::string& action_type, const std::string& task_id, + const TUniqueId& fragment_instance_id, + const std::shared_ptr& mem_tracker); + + void detach_task(); + + // Must be fast enough! + // Thread update_tracker may be called very frequently, adding a memory copy will be slow. + std::string update_tracker(const std::shared_ptr& mem_tracker); + + // Note that, If call the memory allocation operation in TCMalloc new/delete Hook, + // such as calling LOG/iostream/sstream/stringstream/etc. related methods, + // must increase the control to avoid entering infinite recursion, otherwise it may cause crash or stuck, + void cache_consume(int64_t size); + + void noncache_consume(); + + std::shared_ptr mem_tracker() { + DCHECK(_mem_trackers[_tracker_id]); + return _mem_trackers[_tracker_id]; + } + +private: + // If tryConsume fails due to task mem tracker exceeding the limit, the task must be canceled + void exceeded_cancel_task(const std::string& cancel_details); + + void exceeded(int64_t mem_usage, Status st); + +private: + // Cache untracked mem, only update to _untracked_mems when switching mem tracker. + // Frequent calls to unordered_map _untracked_mems[] in cache_consume will degrade performance. + int64_t _untracked_mem = 0; + + // May switch back and forth between multiple trackers frequently. If you use a pointer to save the + // current tracker, and consume the current untracked mem each time you switch, there is a performance problem: + // 1. The frequent change of the use-count of shared_ptr has a huge cost; (it can also be solved by using + // raw pointers, which requires uniform replacement of the pointers of all mem trackers in doris) + // 2. The cost of calling consume for the current untracked mem is huge; + // In order to reduce the cost, during an attach task, the untracked mem of all switched trackers is cached, + // and the untracked mem is consumed only after the upper limit is reached or when the task is detached. + std::unordered_map> _mem_trackers; + std::string _tracker_id; + std::unordered_map _untracked_mems; + + // Avoid memory allocation in functions and fall into an infinite loop + std::string _temp_tracker_id; + ConsumeErrCallBackInfo _temp_consume_err_call_back; + std::shared_ptr _temp_task_mem_tracker; + + std::string _task_id; + TUniqueId _fragment_instance_id; + ConsumeErrCallBackInfo _consume_err_call_back; +}; + +inline std::string ThreadMemTrackerMgr::update_tracker(const std::shared_ptr& mem_tracker) { + DCHECK(mem_tracker); + _temp_tracker_id = mem_tracker->id(); + if (_temp_tracker_id == _tracker_id) { + return _tracker_id; + } + if (_mem_trackers.find(_temp_tracker_id) == _mem_trackers.end()) { + _mem_trackers[_temp_tracker_id] = mem_tracker; + _untracked_mems[_temp_tracker_id] = 0; + } + _untracked_mems[_tracker_id] += _untracked_mem; + _untracked_mem = 0; + std::swap(_tracker_id, _temp_tracker_id); + return _temp_tracker_id; // old tracker_id +} + +inline void ThreadMemTrackerMgr::cache_consume(int64_t size) { + _untracked_mem += size; + // When some threads `0 < _untracked_mem < config::mem_tracker_consume_min_size_bytes` + // and some threads `_untracked_mem <= -config::mem_tracker_consume_min_size_bytes` trigger consumption(), + // it will cause tracker->consumption to be temporarily less than 0. + if (_untracked_mem >= config::mem_tracker_consume_min_size_bytes || + _untracked_mem <= -config::mem_tracker_consume_min_size_bytes) { + DCHECK(_mem_trackers.find(_tracker_id) != _mem_trackers.end()); + // When switching to the current tracker last time, the remaining untracked memory. + if (_untracked_mems[_tracker_id] != 0) { + _untracked_mem += _untracked_mems[_tracker_id]; + _untracked_mems[_tracker_id] = 0; + } + // Avoid getting stuck in infinite loop if there is memory allocation in noncache_consume. + // For example: GC function when try_consume; mem_limit_exceeded. + start_thread_mem_tracker = false; + noncache_consume(); + start_thread_mem_tracker = true; + } +} + +inline void ThreadMemTrackerMgr::noncache_consume() { + DCHECK(_mem_trackers[_tracker_id]); + Status st = _mem_trackers[_tracker_id]->try_consume(_untracked_mem); + if (!st) { + // The memory has been allocated, so when TryConsume fails, need to continue to complete + // the consume to ensure the accuracy of the statistics. + _mem_trackers[_tracker_id]->consume(_untracked_mem); + exceeded(_untracked_mem, st); + } + _untracked_mem = 0; +} + +} // namespace doris diff --git a/be/src/runtime/vectorized_row_batch.cpp b/be/src/runtime/vectorized_row_batch.cpp index f26822833c..06a19fcd9e 100644 --- a/be/src/runtime/vectorized_row_batch.cpp +++ b/be/src/runtime/vectorized_row_batch.cpp @@ -23,14 +23,12 @@ namespace doris { VectorizedRowBatch::VectorizedRowBatch(const TabletSchema* schema, - const std::vector& cols, int capacity, - const std::shared_ptr& parent_tracker) + const std::vector& cols, int capacity) : _schema(schema), _cols(cols), _capacity(capacity), _limit(capacity) { _selected_in_use = false; _size = 0; - _tracker = MemTracker::create_tracker(-1, "VectorizedRowBatch", parent_tracker); - _mem_pool.reset(new MemPool(_tracker.get())); + _mem_pool.reset(new MemPool()); _selected = reinterpret_cast(new char[sizeof(uint16_t) * _capacity]); diff --git a/be/src/runtime/vectorized_row_batch.h b/be/src/runtime/vectorized_row_batch.h index 2f29f38cc3..6819f01c6a 100644 --- a/be/src/runtime/vectorized_row_batch.h +++ b/be/src/runtime/vectorized_row_batch.h @@ -61,8 +61,7 @@ private: class VectorizedRowBatch { public: - VectorizedRowBatch(const TabletSchema* schema, const std::vector& cols, int capacity, - const std::shared_ptr& parent_tracker = nullptr); + VectorizedRowBatch(const TabletSchema* schema, const std::vector& cols, int capacity); ~VectorizedRowBatch() { for (auto vec : _col_vectors) { @@ -120,7 +119,6 @@ private: bool _selected_in_use = false; uint8_t _block_status; - std::shared_ptr _tracker; std::unique_ptr _mem_pool; uint16_t _limit; }; diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index 6564dfe8d0..8a3ef533da 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -50,6 +50,7 @@ #include "runtime/exec_env.h" #include "runtime/heartbeat_flags.h" #include "runtime/minidump.h" +#include "runtime/tcmalloc_hook.h" #include "service/backend_options.h" #include "service/backend_service.h" #include "service/brpc_service.h" @@ -331,6 +332,10 @@ int main(int argc, char** argv) { return -1; } + if (doris::config::track_new_delete) { + init_hook(); + } + #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) // Aggressive decommit is required so that unused pages in the TCMalloc page heap are // not backed by physical pages and do not contribute towards memory consumption. diff --git a/be/src/testutil/function_utils.cpp b/be/src/testutil/function_utils.cpp index 2ebb4c22f8..934e20c6d6 100644 --- a/be/src/testutil/function_utils.cpp +++ b/be/src/testutil/function_utils.cpp @@ -20,7 +20,6 @@ #include #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "udf/udf_internal.h" namespace doris { @@ -28,8 +27,7 @@ namespace doris { FunctionUtils::FunctionUtils() { doris_udf::FunctionContext::TypeDesc return_type; std::vector arg_types; - _mem_tracker.reset(new MemTracker(-1, "function util")); - _memory_pool = new MemPool(_mem_tracker.get()); + _memory_pool = new MemPool("function util"); _fn_ctx = FunctionContextImpl::create_context(_state, _memory_pool, return_type, arg_types, 0, false); } @@ -37,8 +35,7 @@ FunctionUtils::FunctionUtils(RuntimeState* state) { _state = state; doris_udf::FunctionContext::TypeDesc return_type; std::vector arg_types; - _mem_tracker.reset(new MemTracker(-1, "function util")); - _memory_pool = new MemPool(_mem_tracker.get()); + _memory_pool = new MemPool("function util"); _fn_ctx = FunctionContextImpl::create_context(_state, _memory_pool, return_type, arg_types, 0, false); } @@ -46,8 +43,7 @@ FunctionUtils::FunctionUtils(RuntimeState* state) { FunctionUtils::FunctionUtils(const doris_udf::FunctionContext::TypeDesc& return_type, const std::vector& arg_types, int varargs_buffer_size) { - _mem_tracker.reset(new MemTracker(-1, "function util")); - _memory_pool = new MemPool(_mem_tracker.get()); + _memory_pool = new MemPool("function util"); _fn_ctx = FunctionContextImpl::create_context(_state, _memory_pool, return_type, arg_types, varargs_buffer_size, false); } diff --git a/be/src/testutil/function_utils.h b/be/src/testutil/function_utils.h index 30b2a6cdff..0b0902ddac 100644 --- a/be/src/testutil/function_utils.h +++ b/be/src/testutil/function_utils.h @@ -23,7 +23,6 @@ namespace doris { class MemPool; -class MemTracker; class RuntimeState; class FunctionUtils { @@ -39,7 +38,6 @@ public: private: RuntimeState* _state = nullptr; - std::shared_ptr _mem_tracker; MemPool* _memory_pool = nullptr; doris_udf::FunctionContext* _fn_ctx = nullptr; }; diff --git a/be/src/util/arrow/row_batch.cpp b/be/src/util/arrow/row_batch.cpp index 8b61795f1b..1ec1bb64f7 100644 --- a/be/src/util/arrow/row_batch.cpp +++ b/be/src/util/arrow/row_batch.cpp @@ -366,9 +366,8 @@ class ToRowBatchConverter : public arrow::ArrayVisitor { public: using arrow::ArrayVisitor::Visit; - ToRowBatchConverter(const arrow::RecordBatch& batch, const RowDescriptor& row_desc, - const std::shared_ptr& tracker) - : _batch(batch), _row_desc(row_desc), _tracker(tracker) {} + ToRowBatchConverter(const arrow::RecordBatch& batch, const RowDescriptor& row_desc) + : _batch(batch), _row_desc(row_desc) {} #define PRIMITIVE_VISIT(TYPE) \ arrow::Status Visit(const arrow::TYPE& array) override { return _visit(array); } @@ -408,7 +407,6 @@ private: private: const arrow::RecordBatch& _batch; const RowDescriptor& _row_desc; - std::shared_ptr _tracker; std::unique_ptr _cur_slot_ref; std::shared_ptr _output; @@ -428,7 +426,7 @@ Status ToRowBatchConverter::convert(std::shared_ptr* result) { // TODO(zc): check if field type match size_t num_rows = _batch.num_rows(); - _output.reset(new RowBatch(_row_desc, num_rows, _tracker.get())); + _output.reset(new RowBatch(_row_desc, num_rows)); _output->commit_rows(num_rows); auto pool = _output->tuple_data_pool(); for (size_t row_id = 0; row_id < num_rows; ++row_id) { @@ -454,9 +452,8 @@ Status ToRowBatchConverter::convert(std::shared_ptr* result) { } Status convert_to_row_batch(const arrow::RecordBatch& batch, const RowDescriptor& row_desc, - const std::shared_ptr& tracker, std::shared_ptr* result) { - ToRowBatchConverter converter(batch, row_desc, tracker); + ToRowBatchConverter converter(batch, row_desc); return converter.convert(result); } diff --git a/be/src/util/arrow/row_batch.h b/be/src/util/arrow/row_batch.h index a7c2f3991d..f75b060502 100644 --- a/be/src/util/arrow/row_batch.h +++ b/be/src/util/arrow/row_batch.h @@ -35,7 +35,6 @@ class Schema; namespace doris { -class MemTracker; class ObjectPool; class RowBatch; class RowDescriptor; @@ -56,10 +55,8 @@ Status convert_to_arrow_batch(const RowBatch& batch, const std::shared_ptr* result); // Convert an Arrow RecordBatch to a Doris RowBatch. A valid RowDescriptor -// whose schema is the same with RecordBatch's should be given. Memory used -// by result RowBatch will be tracked by tracker. +// whose schema is the same with RecordBatch's should be given. Status convert_to_row_batch(const arrow::RecordBatch& batch, const RowDescriptor& row_desc, - const std::shared_ptr& tracker, std::shared_ptr* result); Status serialize_record_batch(const arrow::RecordBatch& record_batch, std::string* result); diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index a1af7696bd..a8b6c3a18e 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -59,7 +59,7 @@ struct ProcessHashTableBuild { Defer defer {[&]() { int64_t bucket_size = hash_table_ctx.hash_table.get_buffer_size_in_cells(); int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes(); - _join_node->_mem_tracker->consume(bucket_bytes - old_bucket_bytes); + _join_node->_hash_table_mem_tracker->consume(bucket_bytes - old_bucket_bytes); _join_node->_mem_used += bucket_bytes - old_bucket_bytes; COUNTER_SET(_join_node->_build_buckets_counter, bucket_size); }}; @@ -708,6 +708,7 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { Status HashJoinNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); + _hash_table_mem_tracker = MemTracker::create_virtual_tracker(-1, "VSetOperationNode:HashTable"); // Build phase auto build_phase_profile = runtime_profile()->create_child("BuildPhase", true, true); @@ -763,7 +764,8 @@ Status HashJoinNode::close(RuntimeState* state) { if (_vother_join_conjunct_ptr) (*_vother_join_conjunct_ptr)->close(state); - _mem_tracker->release(_mem_used); + _hash_table_mem_tracker->release(_mem_used); + return ExecNode::close(state); } @@ -930,7 +932,7 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) { RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(child(1)->get_next(state, &block, &eos)); - _mem_tracker->consume(block.allocated_bytes()); + _hash_table_mem_tracker->consume(block.allocated_bytes()); _mem_used += block.allocated_bytes(); RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Hash join, while getting next from the child 1."); diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index ca93aaadee..f1175a8a4e 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -230,6 +230,8 @@ private: std::vector _items_counts; std::vector _build_block_offsets; std::vector _build_block_rows; + + std::shared_ptr _hash_table_mem_tracker; private: Status _hash_table_build(RuntimeState* state); Status _process_build_block(RuntimeState* state, Block& block, uint8_t offset); diff --git a/be/src/vec/exec/vaggregation_node.cpp b/be/src/vec/exec/vaggregation_node.cpp index 2f8df6b934..2f8cf06be5 100644 --- a/be/src/vec/exec/vaggregation_node.cpp +++ b/be/src/vec/exec/vaggregation_node.cpp @@ -208,6 +208,7 @@ Status AggregationNode::prepare(RuntimeState* state) { _merge_timer = ADD_TIMER(runtime_profile(), "MergeTime"); _expr_timer = ADD_TIMER(runtime_profile(), "ExprTime"); _get_results_timer = ADD_TIMER(runtime_profile(), "GetResultsTime"); + _data_mem_tracker = MemTracker::create_virtual_tracker(-1, "AggregationNode:Data", mem_tracker()); SCOPED_TIMER(_runtime_profile->total_time_counter()); _intermediate_tuple_desc = state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id); @@ -216,7 +217,7 @@ Status AggregationNode::prepare(RuntimeState* state) { RETURN_IF_ERROR( VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker())); - _mem_pool = std::make_unique(mem_tracker().get()); + _mem_pool = std::make_unique(); int j = _probe_expr_ctxs.size(); for (int i = 0; i < j; ++i) { @@ -365,8 +366,7 @@ Status AggregationNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* return Status::NotSupported("Not Implemented Aggregation Node::get_next scalar"); } -Status AggregationNode::get_next(RuntimeState* state, Block* block, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); +Status AggregationNode::get_next(RuntimeState* state, Block* block, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); if (_is_streaming_preagg) { bool child_eos = false; @@ -555,7 +555,7 @@ Status AggregationNode::_merge_without_key(Block* block) { } void AggregationNode::_update_memusage_without_key() { - mem_tracker()->consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena); + _data_mem_tracker->consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena); _mem_usage_record.used_in_arena = _agg_arena_pool.size(); } @@ -1078,8 +1078,8 @@ void AggregationNode::_update_memusage_with_serialized_key() { std::visit( [&](auto&& agg_method) -> void { auto& data = agg_method.data; - mem_tracker()->consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena); - mem_tracker()->consume(data.get_buffer_size_in_bytes() - + _data_mem_tracker->consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena); + _data_mem_tracker->consume(data.get_buffer_size_in_bytes() - _mem_usage_record.used_in_state); _mem_usage_record.used_in_state = data.get_buffer_size_in_bytes(); _mem_usage_record.used_in_arena = _agg_arena_pool.size(); @@ -1103,7 +1103,7 @@ void AggregationNode::_close_with_serialized_key() { } void AggregationNode::release_tracker() { - mem_tracker()->release(_mem_usage_record.used_in_state + _mem_usage_record.used_in_arena); + _data_mem_tracker->release(_mem_usage_record.used_in_state + _mem_usage_record.used_in_arena); } } // namespace doris::vectorized diff --git a/be/src/vec/exec/vaggregation_node.h b/be/src/vec/exec/vaggregation_node.h index f020b90a6e..080bf5a9ca 100644 --- a/be/src/vec/exec/vaggregation_node.h +++ b/be/src/vec/exec/vaggregation_node.h @@ -435,6 +435,8 @@ private: bool _is_merge; std::unique_ptr _mem_pool; + std::shared_ptr _data_mem_tracker; + size_t _align_aggregate_states = 1; /// The offset to the n-th aggregate function in a row of aggregate functions. Sizes _offsets_of_aggregate_states; diff --git a/be/src/vec/exec/vblocking_join_node.cpp b/be/src/vec/exec/vblocking_join_node.cpp index af1adb957e..4ab17c0f8c 100644 --- a/be/src/vec/exec/vblocking_join_node.cpp +++ b/be/src/vec/exec/vblocking_join_node.cpp @@ -22,6 +22,7 @@ #include "exprs/expr.h" #include "gen_cpp/PlanNodes_types.h" #include "runtime/runtime_state.h" +#include "runtime/thread_context.h" #include "util/runtime_profile.h" namespace doris::vectorized { @@ -67,6 +68,7 @@ Status VBlockingJoinNode::close(RuntimeState* state) { } void VBlockingJoinNode::build_side_thread(RuntimeState* state, std::promise* status) { + SCOPED_ATTACH_TASK_THREAD(state, mem_tracker()); status->set_value(construct_build_side(state)); // Release the thread token as soon as possible (before the main thread joins // on it). This way, if we had a chain of 10 joins using 1 additional thread, diff --git a/be/src/vec/exec/vcross_join_node.cpp b/be/src/vec/exec/vcross_join_node.cpp index e1c14f0027..69b45dcb79 100644 --- a/be/src/vec/exec/vcross_join_node.cpp +++ b/be/src/vec/exec/vcross_join_node.cpp @@ -33,6 +33,7 @@ VCrossJoinNode::VCrossJoinNode(ObjectPool* pool, const TPlanNode& tnode, const D Status VCrossJoinNode::prepare(RuntimeState* state) { DCHECK(_join_op == TJoinOp::CROSS_JOIN); RETURN_IF_ERROR(VBlockingJoinNode::prepare(state)); + _block_mem_tracker = MemTracker::create_virtual_tracker(-1, "VCrossJoinNode:Block", mem_tracker()); _num_existing_columns = child(0)->row_desc().num_materialized_slots(); _num_columns_to_add = child(1)->row_desc().num_materialized_slots(); @@ -44,7 +45,7 @@ Status VCrossJoinNode::close(RuntimeState* state) { if (is_closed()) { return Status::OK(); } - _mem_tracker->release(_total_mem_usage); + _block_mem_tracker->release(_total_mem_usage); VBlockingJoinNode::close(state); return Status::OK(); } @@ -67,7 +68,7 @@ Status VCrossJoinNode::construct_build_side(RuntimeState* state) { _build_rows += rows; _total_mem_usage += mem_usage; _build_blocks.emplace_back(std::move(block)); - _mem_tracker->consume(mem_usage); + _block_mem_tracker->consume(mem_usage); } // to prevent use too many memory RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Cross join, while getting next from the child 1."); diff --git a/be/src/vec/exec/vcross_join_node.h b/be/src/vec/exec/vcross_join_node.h index ba517861bf..94c03a2c0f 100644 --- a/be/src/vec/exec/vcross_join_node.h +++ b/be/src/vec/exec/vcross_join_node.h @@ -64,6 +64,8 @@ private: uint64_t _build_rows = 0; uint64_t _total_mem_usage = 0; + std::shared_ptr _block_mem_tracker; + // Build mutable columns to insert data. // if block can mem reuse, just clear data in block // else build a new block and alloc mem of column from left and right child block diff --git a/be/src/vec/exec/volap_scan_node.cpp b/be/src/vec/exec/volap_scan_node.cpp index 76d6a43cbd..648fc9038e 100644 --- a/be/src/vec/exec/volap_scan_node.cpp +++ b/be/src/vec/exec/volap_scan_node.cpp @@ -21,6 +21,7 @@ #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/runtime_filter_mgr.h" +#include "runtime/thread_context.h" #include "util/priority_thread_pool.hpp" #include "vec/core/block.h" #include "vec/exec/volap_scanner.h" @@ -36,6 +37,7 @@ VOlapScanNode::VOlapScanNode(ObjectPool* pool, const TPlanNode& tnode, const Des void VOlapScanNode::transfer_thread(RuntimeState* state) { // scanner open pushdown to scanThread + SCOPED_ATTACH_TASK_THREAD(state, mem_tracker()); Status status = Status::OK(); if (_vconjunct_ctx_ptr) { @@ -84,7 +86,7 @@ void VOlapScanNode::transfer_thread(RuntimeState* state) { _free_blocks.emplace_back(block); _buffered_bytes += block->allocated_bytes(); } - _mem_tracker->consume(_buffered_bytes); + _block_mem_tracker->consume(_buffered_bytes); // read from scanner while (LIKELY(status.ok())) { @@ -144,6 +146,7 @@ void VOlapScanNode::transfer_thread(RuntimeState* state) { } void VOlapScanNode::scanner_thread(VOlapScanner* scanner) { + SCOPED_ATTACH_TASK_THREAD(_runtime_state, mem_tracker()); int64_t wait_time = scanner->update_wait_worker_timer(); // Do not use ScopedTimer. There is no guarantee that, the counter // (_scan_cpu_timer, the class member) is not destroyed after `_running_thread==0`. @@ -321,6 +324,7 @@ Status VOlapScanNode::start_scan_thread(RuntimeState* state) { _transfer_done = true; return Status::OK(); } + _block_mem_tracker = MemTracker::create_virtual_tracker(-1, "VOlapScanNode:Block"); // ranges constructed from scan keys std::vector> cond_ranges; @@ -420,7 +424,7 @@ Status VOlapScanNode::close(RuntimeState* state) { std::for_each(_scan_blocks.begin(), _scan_blocks.end(), std::default_delete()); _scan_row_batches_bytes = 0; std::for_each(_free_blocks.begin(), _free_blocks.end(), std::default_delete()); - _mem_tracker->release(_buffered_bytes); + _block_mem_tracker->release(_buffered_bytes); // OlapScanNode terminate by exception // so that initiative close the Scanner diff --git a/be/src/vec/exec/volap_scan_node.h b/be/src/vec/exec/volap_scan_node.h index 921399ee6b..09f77364ed 100644 --- a/be/src/vec/exec/volap_scan_node.h +++ b/be/src/vec/exec/volap_scan_node.h @@ -63,6 +63,8 @@ private: std::list _volap_scanners; std::mutex _volap_scanners_lock; + std::shared_ptr _block_mem_tracker; + int _max_materialized_blocks; }; } // namespace vectorized diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index 3f5ffc7a80..0985599d7f 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -40,7 +40,7 @@ struct HashTableBuild { Defer defer {[&]() { int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes(); - _operation_node->_mem_tracker->consume(bucket_bytes - old_bucket_bytes); + _operation_node->_hash_table_mem_tracker->consume(bucket_bytes - old_bucket_bytes); _operation_node->_mem_used += bucket_bytes - old_bucket_bytes; }}; @@ -85,7 +85,7 @@ Status VSetOperationNode::close(RuntimeState* state) { for (auto& exprs : _child_expr_lists) { VExpr::close(exprs, state); } - _mem_tracker->release(_mem_used); + _hash_table_mem_tracker->release(_mem_used); return ExecNode::close(state); } @@ -125,6 +125,7 @@ Status VSetOperationNode::open(RuntimeState* state) { Status VSetOperationNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); + _hash_table_mem_tracker = MemTracker::create_virtual_tracker(-1, "VSetOperationNode:HashTable"); SCOPED_TIMER(_runtime_profile->total_time_counter()); _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); _probe_timer = ADD_TIMER(runtime_profile(), "ProbeTime"); @@ -240,7 +241,7 @@ Status VSetOperationNode::hash_table_build(RuntimeState* state) { RETURN_IF_ERROR(child(0)->get_next(state, &block, &eos)); size_t allocated_bytes = block.allocated_bytes(); - _mem_tracker->consume(allocated_bytes); + _hash_table_mem_tracker->consume(allocated_bytes); _mem_used += allocated_bytes; RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Set Operation Node, while getting next from the child 0."); diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h index 1f8519c955..4b9034a81e 100644 --- a/be/src/vec/exec/vset_operation_node.h +++ b/be/src/vec/exec/vset_operation_node.h @@ -90,6 +90,8 @@ protected: RuntimeProfile::Counter* _build_timer; // time to build hash table RuntimeProfile::Counter* _probe_timer; // time to probe + std::shared_ptr _hash_table_mem_tracker; + template friend struct HashTableBuild; template diff --git a/be/src/vec/exec/vsort_node.cpp b/be/src/vec/exec/vsort_node.cpp index 919c5a040e..4b5197a497 100644 --- a/be/src/vec/exec/vsort_node.cpp +++ b/be/src/vec/exec/vsort_node.cpp @@ -43,6 +43,7 @@ Status VSortNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); _runtime_profile->add_info_string("TOP-N", _limit == -1 ? "false" : "true"); RETURN_IF_ERROR(ExecNode::prepare(state)); + _block_mem_tracker = MemTracker::create_virtual_tracker(-1, "VSortNode:Block", mem_tracker()); RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor, expr_mem_tracker())); return Status::OK(); @@ -102,7 +103,7 @@ Status VSortNode::close(RuntimeState* state) { if (is_closed()) { return Status::OK(); } - _mem_tracker->release(_total_mem_usage); + _block_mem_tracker->release(_total_mem_usage); _vsort_exec_exprs.close(state); ExecNode::close(state); return Status::OK(); @@ -159,7 +160,7 @@ Status VSortNode::sort_input(RuntimeState* state) { _sorted_blocks.emplace_back(std::move(block)); } - _mem_tracker->consume(mem_usage); + _block_mem_tracker->consume(mem_usage); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(state->check_query_state("vsort, while sorting input.")); } diff --git a/be/src/vec/exec/vsort_node.h b/be/src/vec/exec/vsort_node.h index f67326afa6..f224938bd9 100644 --- a/be/src/vec/exec/vsort_node.h +++ b/be/src/vec/exec/vsort_node.h @@ -84,6 +84,8 @@ private: // only valid in TOP-N node uint64_t _num_rows_in_block = 0; std::priority_queue _block_priority_queue; + + std::shared_ptr _block_mem_tracker; }; } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index a8f1d5291f..1a8c3f15ee 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -39,7 +39,8 @@ doris::Status VExprContext::prepare(doris::RuntimeState* state, const doris::RowDescriptor& row_desc, const std::shared_ptr& tracker) { _prepared = true; - _pool.reset(new MemPool(state->instance_mem_tracker().get())); + _mem_tracker = tracker; + _pool.reset(new MemPool(_mem_tracker.get())); return _root->prepare(state, row_desc, this); } @@ -86,6 +87,7 @@ doris::Status VExprContext::clone(RuntimeState* state, VExprContext** new_ctx) { (*new_ctx)->_is_clone = true; (*new_ctx)->_prepared = true; (*new_ctx)->_opened = true; + (*new_ctx)->_mem_tracker = _mem_tracker; return _root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL); } diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index 0021779b35..2df377d770 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -83,6 +83,8 @@ private: /// and owned by this VExprContext. std::vector _fn_contexts; + std::shared_ptr _mem_tracker; + /// Pool backing fn_contexts_. Counts against the runtime state's UDF mem tracker. std::unique_ptr _pool; diff --git a/be/src/vec/olap/vgeneric_iterators.cpp b/be/src/vec/olap/vgeneric_iterators.cpp index b31d5117e5..f0c40810ae 100644 --- a/be/src/vec/olap/vgeneric_iterators.cpp +++ b/be/src/vec/olap/vgeneric_iterators.cpp @@ -266,11 +266,8 @@ Status VMergeIteratorContext::_load_next_block() { class VMergeIterator : public RowwiseIterator { public: // VMergeIterator takes the ownership of input iterators - VMergeIterator(std::vector& iters, std::shared_ptr parent, int sequence_id_idx) : - _origin_iters(iters),_sequence_id_idx(sequence_id_idx) { - // use for count the mem use of Block use in Merge - _mem_tracker = MemTracker::create_tracker(-1, "VMergeIterator", parent); - } + VMergeIterator(std::vector& iters, int sequence_id_idx) : + _origin_iters(iters),_sequence_id_idx(sequence_id_idx) {} ~VMergeIterator() override { while (!_merge_heap.empty()) { @@ -359,10 +356,7 @@ public: // Iterators' ownership it transfered to this class. // This class will delete all iterators when destructs // Client should not use iterators any more. - VUnionIterator(std::vector& v, std::shared_ptr parent) - : _origin_iters(v.begin(), v.end()) { - _mem_tracker = MemTracker::create_tracker(-1, "VUnionIterator", parent); - } + VUnionIterator(std::vector& v) : _origin_iters(v.begin(), v.end()) {} ~VUnionIterator() override { std::for_each(_origin_iters.begin(), _origin_iters.end(), std::default_delete()); @@ -412,18 +406,18 @@ Status VUnionIterator::next_batch(vectorized::Block* block) { } -RowwiseIterator* new_merge_iterator(std::vector& inputs, std::shared_ptr parent, int sequence_id_idx) { +RowwiseIterator* new_merge_iterator(std::vector& inputs, int sequence_id_idx) { if (inputs.size() == 1) { return *(inputs.begin()); } - return new VMergeIterator(inputs, parent, sequence_id_idx); + return new VMergeIterator(inputs, sequence_id_idx); } -RowwiseIterator* new_union_iterator(std::vector& inputs, std::shared_ptr parent) { +RowwiseIterator* new_union_iterator(std::vector& inputs) { if (inputs.size() == 1) { return *(inputs.begin()); } - return new VUnionIterator(inputs, parent); + return new VUnionIterator(inputs); } RowwiseIterator* new_auto_increment_iterator(const Schema& schema, size_t num_rows) { diff --git a/be/src/vec/olap/vgeneric_iterators.h b/be/src/vec/olap/vgeneric_iterators.h index af9733bf44..ca37004a7e 100644 --- a/be/src/vec/olap/vgeneric_iterators.h +++ b/be/src/vec/olap/vgeneric_iterators.h @@ -27,14 +27,14 @@ namespace vectorized { // // Inputs iterators' ownership is taken by created merge iterator. And client // should delete returned iterator after usage. -RowwiseIterator* new_merge_iterator(std::vector& inputs, std::shared_ptr parent, int sequence_id_idx); +RowwiseIterator* new_merge_iterator(std::vector& inputs, int sequence_id_idx); // Create a union iterator for input iterators. Union iterator will read // input iterators one by one. // // Inputs iterators' ownership is taken by created union iterator. And client // should delete returned iterator after usage. -RowwiseIterator* new_union_iterator(std::vector& inputs, std::shared_ptr parent); +RowwiseIterator* new_union_iterator(std::vector& inputs); // Create an auto increment iterator which returns num_rows data in format of schema. // This class aims to be used in unit test. diff --git a/be/src/vec/runtime/vdata_stream_mgr.cpp b/be/src/vec/runtime/vdata_stream_mgr.cpp index 4b0bb5f75c..b26d11cd6b 100644 --- a/be/src/vec/runtime/vdata_stream_mgr.cpp +++ b/be/src/vec/runtime/vdata_stream_mgr.cpp @@ -53,7 +53,7 @@ std::shared_ptr VDataStreamMgr::create_recvr( VLOG_FILE << "creating receiver for fragment=" << fragment_instance_id << ", node=" << dest_node_id; std::shared_ptr recvr(new VDataStreamRecvr( - this, state->instance_mem_tracker(), row_desc, fragment_instance_id, dest_node_id, + this, row_desc, fragment_instance_id, dest_node_id, num_senders, is_merging, buffer_size, profile, sub_plan_query_statistics_recvr)); uint32_t hash_value = get_hash_value(fragment_instance_id, dest_node_id); std::lock_guard l(_lock); diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp index 0ace1a7b8c..3c019ffc04 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.cpp +++ b/be/src/vec/runtime/vdata_stream_recvr.cpp @@ -123,7 +123,7 @@ void VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_numbe SCOPED_TIMER(_recvr->_deserialize_row_batch_timer); block = new Block(pblock); } - _recvr->_mem_tracker->consume(block->bytes()); + _recvr->_block_mem_tracker->consume(block->bytes()); VLOG_ROW << "added #rows=" << block->rows() << " batch_size=" << block_byte_size << "\n"; _block_queue.emplace_back(block_byte_size, block); @@ -162,7 +162,7 @@ void VDataStreamRecvr::SenderQueue::add_block(Block* block, bool use_move) { std::unique_lock l(_lock); size_t block_size = nblock->bytes(); _block_queue.emplace_back(block_size, nblock); - _recvr->_mem_tracker->consume(nblock->bytes()); + _recvr->_block_mem_tracker->consume(nblock->bytes()); _data_arrival_cv.notify_one(); if (_recvr->exceeds_limit(block_size)) { @@ -245,10 +245,9 @@ void VDataStreamRecvr::SenderQueue::close() { } VDataStreamRecvr::VDataStreamRecvr( - VDataStreamMgr* stream_mgr, const std::shared_ptr& parent_tracker, - const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id, - PlanNodeId dest_node_id, int num_senders, bool is_merging, int total_buffer_limit, - RuntimeProfile* profile, + VDataStreamMgr* stream_mgr, const RowDescriptor& row_desc, + const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders, + bool is_merging, int total_buffer_limit, RuntimeProfile* profile, std::shared_ptr sub_plan_query_statistics_recvr) : _mgr(stream_mgr), _fragment_instance_id(fragment_instance_id), @@ -262,7 +261,9 @@ VDataStreamRecvr::VDataStreamRecvr( _sub_plan_query_statistics_recvr(sub_plan_query_statistics_recvr) { _mem_tracker = MemTracker::create_tracker(-1, "VDataStreamRecvr:" + print_id(_fragment_instance_id), - parent_tracker, MemTrackerLevel::VERBOSE, _profile); + nullptr, MemTrackerLevel::VERBOSE, _profile); + _block_mem_tracker = MemTracker::create_virtual_tracker( + -1, "VDataStreamRecvr:block:" + print_id(_fragment_instance_id), _mem_tracker); // Create one queue per sender if is_merging is true. int num_queues = is_merging ? num_senders : 1; @@ -330,10 +331,10 @@ Status VDataStreamRecvr::get_next(Block* block, bool* eos) { RETURN_IF_ERROR(_merger->get_next(block, eos)); } - if (LIKELY(_mem_tracker->consumption() >= block->bytes())) { - _mem_tracker->release(block->bytes()); + if (LIKELY(_block_mem_tracker->consumption() >= block->bytes())) { + _block_mem_tracker->release(block->bytes()); } else { - _mem_tracker->release(_mem_tracker->consumption()); + _block_mem_tracker->release(_block_mem_tracker->consumption()); } return Status::OK(); } @@ -363,7 +364,7 @@ void VDataStreamRecvr::close() { _mgr = nullptr; _merger.reset(); - _mem_tracker->release(_mem_tracker->consumption()); + _block_mem_tracker->release(_block_mem_tracker->consumption()); } } // namespace doris::vectorized diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h index 9b41e76d61..7e74944712 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.h +++ b/be/src/vec/runtime/vdata_stream_recvr.h @@ -50,10 +50,10 @@ class VExprContext; class VDataStreamRecvr { public: - VDataStreamRecvr(VDataStreamMgr* stream_mgr, const std::shared_ptr& parent_tracker, - const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id, - PlanNodeId dest_node_id, int num_senders, bool is_merging, - int total_buffer_limit, RuntimeProfile* profile, + VDataStreamRecvr(VDataStreamMgr* stream_mgr, const RowDescriptor& row_desc, + const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, + int num_senders, bool is_merging, int total_buffer_limit, + RuntimeProfile* profile, std::shared_ptr sub_plan_query_statistics_recvr); ~VDataStreamRecvr(); @@ -73,7 +73,6 @@ public: const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; } PlanNodeId dest_node_id() const { return _dest_node_id; } const RowDescriptor& row_desc() const { return _row_desc; } - std::shared_ptr mem_tracker() const { return _mem_tracker; } void add_sub_plan_statistics(const PQueryStatistics& statistics, int sender_id) { _sub_plan_query_statistics_recvr->insert(statistics, sender_id); @@ -117,6 +116,7 @@ private: std::atomic _num_buffered_bytes; std::shared_ptr _mem_tracker; + std::shared_ptr _block_mem_tracker; std::vector _sender_queues; std::unique_ptr _merger; diff --git a/be/test/exec/broker_scan_node_test.cpp b/be/test/exec/broker_scan_node_test.cpp index 7f6d1d1a0f..4c4c9f1a90 100644 --- a/be/test/exec/broker_scan_node_test.cpp +++ b/be/test/exec/broker_scan_node_test.cpp @@ -458,9 +458,8 @@ TEST_F(BrokerScanNodeTest, normal) { status = scan_node.open(&_runtime_state); ASSERT_TRUE(status.ok()); - auto tracker = std::make_shared(); // Get batch - RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size(), tracker.get()); + RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size()); bool eos = false; status = scan_node.get_next(&_runtime_state, &batch, &eos); diff --git a/be/test/exec/csv_scan_node_test.cpp b/be/test/exec/csv_scan_node_test.cpp index b2feef0d65..f60d3172a3 100644 --- a/be/test/exec/csv_scan_node_test.cpp +++ b/be/test/exec/csv_scan_node_test.cpp @@ -249,8 +249,7 @@ TEST_F(CsvScanNodeTest, NormalUse) { status = scan_node.open(_state); ASSERT_TRUE(status.ok()); - auto tracker = std::make_shared(); - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), tracker.get()); + RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); bool eos = false; while (!eos) { @@ -288,7 +287,7 @@ TEST_F(CsvScanNodeTest, continuousDelim) { status = scan_node.open(_state); ASSERT_TRUE(status.ok()); - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), tracker.get()); + RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); bool eos = false; while (!eos) { @@ -326,8 +325,7 @@ TEST_F(CsvScanNodeTest, wrong_decimal_format_test) { status = scan_node.open(_state); ASSERT_TRUE(status.ok()); - auto tracker = std::make_shared(); - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), tracker.get()); + RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); bool eos = false; while (!eos) { @@ -355,8 +353,7 @@ TEST_F(CsvScanNodeTest, fill_fix_len_stringi_test) { status = scan_node.open(_state); ASSERT_TRUE(status.ok()); - auto tracker = std::make_shared(); - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), tracker.get()); + RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); bool eos = false; while (!eos) { @@ -400,8 +397,7 @@ TEST_F(CsvScanNodeTest, wrong_fix_len_string_format_test) { status = scan_node.open(_state); ASSERT_TRUE(status.ok()); - auto tracker = std::make_shared(); - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), tracker.get()); + RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); bool eos = false; while (!eos) { diff --git a/be/test/exec/es_scan_node_test.cpp b/be/test/exec/es_scan_node_test.cpp index e6751c7886..2a68abebcf 100644 --- a/be/test/exec/es_scan_node_test.cpp +++ b/be/test/exec/es_scan_node_test.cpp @@ -133,8 +133,7 @@ TEST_F(EsScanNodeTest, normal_use) { status = scan_node.open(&_runtime_state); ASSERT_TRUE(status.ok()); - std::shared_ptr mem_tracker(new MemTracker(-1)); - RowBatch row_batch(scan_node._row_descriptor, _runtime_state.batch_size(), mem_tracker.get()); + RowBatch row_batch(scan_node._row_descriptor, _runtime_state.batch_size()); bool eos = false; status = scan_node.get_next(&_runtime_state, &row_batch, &eos); ASSERT_TRUE(status.ok()); diff --git a/be/test/exec/json_scanner_test.cpp b/be/test/exec/json_scanner_test.cpp index 7aa43af655..c10f59327e 100644 --- a/be/test/exec/json_scanner_test.cpp +++ b/be/test/exec/json_scanner_test.cpp @@ -560,9 +560,8 @@ TEST_F(JsonScannerTest, normal_simple_arrayjson) { status = scan_node.open(&_runtime_state); ASSERT_TRUE(status.ok()); - MemTracker tracker; // Get batch - RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size(), &tracker); + RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size()); bool eof = false; status = scan_node.get_next(&_runtime_state, &batch, &eof); ASSERT_TRUE(status.ok()); diff --git a/be/test/exec/json_scanner_test_with_jsonpath.cpp b/be/test/exec/json_scanner_test_with_jsonpath.cpp index 8c1eb17e46..907afb25df 100644 --- a/be/test/exec/json_scanner_test_with_jsonpath.cpp +++ b/be/test/exec/json_scanner_test_with_jsonpath.cpp @@ -391,9 +391,8 @@ TEST_F(JsonScannerTest, normal) { status = scan_node.open(&_runtime_state); ASSERT_TRUE(status.ok()); - MemTracker tracker; // Get batch - RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size(), &tracker); + RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size()); bool eof = false; status = scan_node.get_next(&_runtime_state, &batch, &eof); ASSERT_TRUE(status.ok()); diff --git a/be/test/exec/parquet_scanner_test.cpp b/be/test/exec/parquet_scanner_test.cpp index e299f71c59..7a1bf8a38d 100644 --- a/be/test/exec/parquet_scanner_test.cpp +++ b/be/test/exec/parquet_scanner_test.cpp @@ -462,9 +462,8 @@ TEST_F(ParquetScannerTest, normal) { status = scan_node.open(&_runtime_state); ASSERT_TRUE(status.ok()); - auto tracker = std::make_shared(); // Get batch - RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size(), tracker.get()); + RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size()); bool eof = false; for (int i = 0; i < 14; i++) { status = scan_node.get_next(&_runtime_state, &batch, &eof); diff --git a/be/test/exec/tablet_info_test.cpp b/be/test/exec/tablet_info_test.cpp index 9b676d55f7..9a731de837 100644 --- a/be/test/exec/tablet_info_test.cpp +++ b/be/test/exec/tablet_info_test.cpp @@ -144,8 +144,7 @@ TEST_F(OlapTablePartitionParamTest, normal) { ASSERT_TRUE(st.ok()); RowDescriptor row_desc(*desc_tbl, {0}, {false}); TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - auto tracker = std::make_shared(); - RowBatch batch(row_desc, 1024, tracker.get()); + RowBatch batch(row_desc, 1024); // 12, 9, "abc" { Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); @@ -337,8 +336,7 @@ TEST_F(OlapTablePartitionParamTest, single_list_partition) { ASSERT_TRUE(st.ok()); RowDescriptor row_desc(*desc_tbl, {0}, {false}); TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - auto tracker = std::make_shared(); - RowBatch batch(row_desc, 1024, tracker.get()); + RowBatch batch(row_desc, 1024); // 12, 1, "abc" { Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); @@ -549,8 +547,7 @@ TEST_F(OlapTablePartitionParamTest, multi_list_partition) { ASSERT_TRUE(st.ok()); RowDescriptor row_desc(*desc_tbl, {0}, {false}); TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - auto tracker = std::make_shared(); - RowBatch batch(row_desc, 1024, tracker.get()); + RowBatch batch(row_desc, 1024); // 12, 1, "beijing" { Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); @@ -682,8 +679,7 @@ TEST_F(OlapTablePartitionParamTest, unpartitioned) { ASSERT_TRUE(st.ok()); RowDescriptor row_desc(*desc_tbl, {0}, {false}); TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - auto tracker = std::make_shared(); - RowBatch batch(row_desc, 1024, tracker.get()); + RowBatch batch(row_desc, 1024); // 12, 9, "abc" { Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); diff --git a/be/test/exec/tablet_sink_test.cpp b/be/test/exec/tablet_sink_test.cpp index e59e972097..5b0428d5d4 100644 --- a/be/test/exec/tablet_sink_test.cpp +++ b/be/test/exec/tablet_sink_test.cpp @@ -343,10 +343,9 @@ public: k_add_batch_status.to_protobuf(response->mutable_status()); if (request->has_row_batch() && _row_desc != nullptr) { - auto tracker = std::make_shared(); brpc::Controller* cntl = static_cast(controller); attachment_transfer_request_row_batch(request, cntl); - RowBatch batch(*_row_desc, request->row_batch(), tracker.get()); + RowBatch batch(*_row_desc, request->row_batch()); for (int i = 0; i < batch.num_rows(); ++i) { LOG(INFO) << batch.get_row(i)->to_string(*_row_desc); _output_set->emplace(batch.get_row(i)->to_string(*_row_desc)); @@ -415,8 +414,7 @@ TEST_F(OlapTableSinkTest, normal) { st = sink.open(&state); ASSERT_TRUE(st.ok()); // send - auto tracker = std::make_shared(); - RowBatch batch(row_desc, 1024, tracker.get()); + RowBatch batch(row_desc, 1024); // 12, 9, "abc" { Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); @@ -549,8 +547,7 @@ TEST_F(OlapTableSinkTest, convert) { st = sink.open(&state); ASSERT_TRUE(st.ok()); // send - auto tracker = std::make_shared(); - RowBatch batch(row_desc, 1024, tracker.get()); + RowBatch batch(row_desc, 1024); // 12, 9, "abc" { Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); @@ -858,8 +855,7 @@ TEST_F(OlapTableSinkTest, add_batch_failed) { st = sink.open(&state); ASSERT_TRUE(st.ok()); // send - auto tracker = std::make_shared(); - RowBatch batch(row_desc, 1024, tracker.get()); + RowBatch batch(row_desc, 1024); TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); // 12, 9, "abc" { @@ -939,8 +935,7 @@ TEST_F(OlapTableSinkTest, decimal) { st = sink.open(&state); ASSERT_TRUE(st.ok()); // send - auto tracker = std::make_shared(); - RowBatch batch(row_desc, 1024, tracker.get()); + RowBatch batch(row_desc, 1024); // 12, 12.3 { Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); diff --git a/be/test/exprs/bloom_filter_predicate_test.cpp b/be/test/exprs/bloom_filter_predicate_test.cpp index 1cba866e7d..84a0e0364e 100644 --- a/be/test/exprs/bloom_filter_predicate_test.cpp +++ b/be/test/exprs/bloom_filter_predicate_test.cpp @@ -31,9 +31,7 @@ public: }; TEST_F(BloomFilterPredicateTest, bloom_filter_func_int_test) { - auto tracker = MemTracker::create_tracker(); - std::unique_ptr func( - create_bloom_filter(tracker.get(), PrimitiveType::TYPE_INT)); + std::unique_ptr func(create_bloom_filter(PrimitiveType::TYPE_INT)); ASSERT_TRUE(func->init(1024, 0.05).ok()); const int data_size = 1024; int data[data_size]; @@ -53,9 +51,7 @@ TEST_F(BloomFilterPredicateTest, bloom_filter_func_int_test) { } TEST_F(BloomFilterPredicateTest, bloom_filter_func_stringval_test) { - auto tracker = MemTracker::create_tracker(); - std::unique_ptr func( - create_bloom_filter(tracker.get(), PrimitiveType::TYPE_VARCHAR)); + std::unique_ptr func(create_bloom_filter(PrimitiveType::TYPE_VARCHAR)); ASSERT_TRUE(func->init(1024, 0.05).ok()); ObjectPool obj_pool; const int data_size = 1024; @@ -74,7 +70,7 @@ TEST_F(BloomFilterPredicateTest, bloom_filter_func_stringval_test) { ASSERT_FALSE(func->find((const void*)¬_exist_val)); // test fixed char - func.reset(create_bloom_filter(tracker.get(), PrimitiveType::TYPE_CHAR)); + func.reset(create_bloom_filter(PrimitiveType::TYPE_CHAR)); ASSERT_TRUE(func->init(1024, 0.05).ok()); auto varchar_true_str = obj_pool.add(new std::string("true")); @@ -104,9 +100,7 @@ TEST_F(BloomFilterPredicateTest, bloom_filter_func_stringval_test) { } TEST_F(BloomFilterPredicateTest, bloom_filter_size_test) { - auto tracker = MemTracker::create_tracker(); - std::unique_ptr func( - create_bloom_filter(tracker.get(), PrimitiveType::TYPE_VARCHAR)); + std::unique_ptr func(create_bloom_filter(PrimitiveType::TYPE_VARCHAR)); int length = 4096; func->init_with_fixed_length(4096); char* data = nullptr; diff --git a/be/test/exprs/runtime_filter_test.cpp b/be/test/exprs/runtime_filter_test.cpp index 4c4a206974..c96841a2c9 100644 --- a/be/test/exprs/runtime_filter_test.cpp +++ b/be/test/exprs/runtime_filter_test.cpp @@ -104,9 +104,8 @@ IRuntimeFilter* create_runtime_filter(TRuntimeFilterType::type type, TQueryOptio } IRuntimeFilter* runtime_filter = nullptr; - Status status = IRuntimeFilter::create( - _runtime_stat, _runtime_stat->instance_mem_tracker().get(), _obj_pool, &desc, options, - RuntimeFilterRole::PRODUCER, -1, &runtime_filter); + Status status = IRuntimeFilter::create(_runtime_stat, _obj_pool, &desc, options, + RuntimeFilterRole::PRODUCER, -1, &runtime_filter); assert(status.ok()); diff --git a/be/test/olap/bloom_filter_column_predicate_test.cpp b/be/test/olap/bloom_filter_column_predicate_test.cpp index 7921fc88de..dcf637eefe 100644 --- a/be/test/olap/bloom_filter_column_predicate_test.cpp +++ b/be/test/olap/bloom_filter_column_predicate_test.cpp @@ -95,9 +95,7 @@ TEST_F(TestBloomFilterColumnPredicate, FLOAT_COLUMN) { return_columns.push_back(i); } - auto tracker = MemTracker::create_tracker(-1, "OlapScanner"); - std::shared_ptr bloom_filter( - create_bloom_filter(tracker.get(), PrimitiveType::TYPE_FLOAT)); + std::shared_ptr bloom_filter(create_bloom_filter(PrimitiveType::TYPE_FLOAT)); bloom_filter->init(4096, 0.05); float value = 4.1; diff --git a/be/test/olap/cumulative_compaction_policy_test.cpp b/be/test/olap/cumulative_compaction_policy_test.cpp index 929149a0b0..381a5eb4cc 100644 --- a/be/test/olap/cumulative_compaction_policy_test.cpp +++ b/be/test/olap/cumulative_compaction_policy_test.cpp @@ -1042,8 +1042,7 @@ TEST_F(TestSizeBasedCumulativeCompactionPolicy, _pick_missing_version_cumulative rowsets.push_back(_tablet->get_rowset_by_version({1, 1})); rowsets.push_back(_tablet->get_rowset_by_version({2, 2})); rowsets.push_back(_tablet->get_rowset_by_version({4, 4})); - std::shared_ptr mem_tracker(new MemTracker()); - CumulativeCompaction compaction(_tablet, mem_tracker); + CumulativeCompaction compaction(_tablet); compaction.find_longest_consecutive_version(&rowsets, nullptr); ASSERT_EQ(3, rowsets.size()); ASSERT_EQ(2, rowsets[2]->end_version()); diff --git a/be/test/olap/delta_writer_test.cpp b/be/test/olap/delta_writer_test.cpp index 1e2cab2e1e..8f279d1abf 100644 --- a/be/test/olap/delta_writer_test.cpp +++ b/be/test/olap/delta_writer_test.cpp @@ -47,7 +47,6 @@ namespace doris { static const uint32_t MAX_PATH_LEN = 1024; StorageEngine* k_engine = nullptr; -std::shared_ptr k_mem_tracker = nullptr; void set_up() { char buffer[MAX_PATH_LEN]; @@ -66,7 +65,6 @@ void set_up() { ExecEnv* exec_env = doris::ExecEnv::GetInstance(); exec_env->set_storage_engine(k_engine); k_engine->start_bg_threads(); - k_mem_tracker.reset(new MemTracker(-1, "delta writer test")); } void tear_down() { @@ -368,7 +366,7 @@ TEST_F(TestDeltaWriter, open) { load_id.set_lo(0); WriteRequest write_req = {10003, 270068375, WriteType::LOAD, 20001, 30001, load_id, tuple_desc}; DeltaWriter* delta_writer = nullptr; - DeltaWriter::open(&write_req, k_mem_tracker, &delta_writer); + DeltaWriter::open(&write_req, &delta_writer); ASSERT_NE(delta_writer, nullptr); res = delta_writer->close(); ASSERT_EQ(OLAP_SUCCESS, res); @@ -402,7 +400,7 @@ TEST_F(TestDeltaWriter, write) { WriteRequest write_req = {10004, 270068376, WriteType::LOAD, 20002, 30002, load_id, tuple_desc, &(tuple_desc->slots())}; DeltaWriter* delta_writer = nullptr; - DeltaWriter::open(&write_req, k_mem_tracker, &delta_writer); + DeltaWriter::open(&write_req, &delta_writer); ASSERT_NE(delta_writer, nullptr); auto tracker = std::make_shared(); @@ -528,7 +526,7 @@ TEST_F(TestDeltaWriter, sequence_col) { WriteRequest write_req = {10005, 270068377, WriteType::LOAD, 20003, 30003, load_id, tuple_desc, &(tuple_desc->slots())}; DeltaWriter* delta_writer = nullptr; - DeltaWriter::open(&write_req, k_mem_tracker, &delta_writer); + DeltaWriter::open(&write_req, &delta_writer); ASSERT_NE(delta_writer, nullptr); MemTracker tracker; diff --git a/be/test/olap/generic_iterators_test.cpp b/be/test/olap/generic_iterators_test.cpp index c21f6b66a8..ebbef342d4 100644 --- a/be/test/olap/generic_iterators_test.cpp +++ b/be/test/olap/generic_iterators_test.cpp @@ -83,8 +83,7 @@ TEST(GenericIteratorsTest, Union) { inputs.push_back(new_auto_increment_iterator(schema, 200)); inputs.push_back(new_auto_increment_iterator(schema, 300)); - auto iter = - new_union_iterator(inputs, MemTracker::create_tracker(-1, "UnionIterator", nullptr)); + auto iter = new_union_iterator(inputs); StorageReadOptions opts; auto st = iter->init(opts); ASSERT_TRUE(st.ok()); @@ -123,8 +122,7 @@ TEST(GenericIteratorsTest, Merge) { inputs.push_back(new_auto_increment_iterator(schema, 200)); inputs.push_back(new_auto_increment_iterator(schema, 300)); - auto iter = new_merge_iterator(std::move(inputs), - MemTracker::create_tracker(-1, "MergeIterator", nullptr), -1); + auto iter = new_merge_iterator(std::move(inputs), -1); StorageReadOptions opts; auto st = iter->init(opts); ASSERT_TRUE(st.ok()); diff --git a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp index 0808b65ff0..eff75eca83 100644 --- a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp +++ b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp @@ -147,7 +147,6 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, OlapReaderStatistics stats; iter_opts.stats = &stats; iter_opts.rblock = rblock.get(); - iter_opts.mem_tracker = std::make_shared(); st = iter->init(iter_opts); ASSERT_TRUE(st.ok()); @@ -208,7 +207,6 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, OlapReaderStatistics stats; iter_opts.stats = &stats; iter_opts.rblock = rblock.get(); - iter_opts.mem_tracker = std::make_shared(); st = iter->init(iter_opts); ASSERT_TRUE(st.ok()); @@ -334,7 +332,6 @@ void test_array_nullable_data(CollectionValue* src_data, uint8_t* src_is_null, i OlapReaderStatistics stats; iter_opts.stats = &stats; iter_opts.rblock = rblock.get(); - iter_opts.mem_tracker = std::make_shared(); st = iter->init(iter_opts); ASSERT_TRUE(st.ok()); // sequence read @@ -470,7 +467,6 @@ void test_read_default_value(string value, void* result) { tablet_column.default_value(), tablet_column.is_nullable(), type_info, tablet_column.length()); ColumnIteratorOptions iter_opts; - iter_opts.mem_tracker = std::make_shared(); auto st = iter.init(iter_opts); ASSERT_TRUE(st.ok()); // sequence read @@ -582,7 +578,6 @@ void test_v_read_default_value(string value, void* result) { tablet_column.default_value(), tablet_column.is_nullable(), type_info, tablet_column.length()); ColumnIteratorOptions iter_opts; - iter_opts.mem_tracker = std::make_shared(); auto st = iter.init(iter_opts); ASSERT_TRUE(st.ok()); diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp index b62b731cb2..1a847f4222 100644 --- a/be/test/olap/rowset/segment_v2/segment_test.cpp +++ b/be/test/olap/rowset/segment_v2/segment_test.cpp @@ -166,7 +166,7 @@ TEST_F(SegmentReaderWriterTest, normal) { StorageReadOptions read_opts; read_opts.stats = &stats; std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); @@ -222,7 +222,7 @@ TEST_F(SegmentReaderWriterTest, normal) { read_opts.stats = &stats; read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), true); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 100); ASSERT_TRUE(iter->next_batch(&block).ok()); @@ -248,7 +248,7 @@ TEST_F(SegmentReaderWriterTest, normal) { read_opts.stats = &stats; read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 100); ASSERT_TRUE(iter->next_batch(&block).is_end_of_file()); @@ -277,7 +277,7 @@ TEST_F(SegmentReaderWriterTest, normal) { read_opts.stats = &stats; read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 100); ASSERT_TRUE(iter->next_batch(&block).is_end_of_file()); @@ -314,7 +314,7 @@ TEST_F(SegmentReaderWriterTest, LazyMaterialization) { read_opts.stats = &stats; std::unique_ptr iter; - ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, nullptr, &iter).ok()); + ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, &iter).ok()); RowBlockV2 block(read_schema, 1024); ASSERT_TRUE(iter->next_batch(&block).ok()); @@ -338,7 +338,7 @@ TEST_F(SegmentReaderWriterTest, LazyMaterialization) { read_opts.stats = &stats; std::unique_ptr iter; - ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, nullptr, &iter).ok()); + ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, &iter).ok()); RowBlockV2 block(read_schema, 1024); ASSERT_TRUE(iter->next_batch(&block).ok()); @@ -358,7 +358,7 @@ TEST_F(SegmentReaderWriterTest, LazyMaterialization) { read_opts.stats = &stats; std::unique_ptr iter; - ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, nullptr, &iter).ok()); + ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, &iter).ok()); RowBlockV2 block(read_schema, 1024); ASSERT_TRUE(iter->next_batch(&block).ok()); @@ -390,7 +390,7 @@ TEST_F(SegmentReaderWriterTest, LazyMaterialization) { read_opts.stats = &stats; std::unique_ptr iter; - ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, nullptr, &iter).ok()); + ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, &iter).ok()); RowBlockV2 block(read_schema, 1024); ASSERT_TRUE(iter->next_batch(&block).ok()); @@ -452,7 +452,7 @@ TEST_F(SegmentReaderWriterTest, TestIndex) { read_opts.conditions = conditions.get(); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1); @@ -475,7 +475,7 @@ TEST_F(SegmentReaderWriterTest, TestIndex) { read_opts.conditions = conditions.get(); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); @@ -535,7 +535,7 @@ TEST_F(SegmentReaderWriterTest, TestIndex) { read_opts.delete_conditions.push_back(delete_conditions.get()); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); @@ -583,7 +583,7 @@ TEST_F(SegmentReaderWriterTest, TestIndex) { ASSERT_EQ(OLAP_SUCCESS, conditions->append_condition(condition)); read_opts.conditions = conditions.get(); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); ASSERT_TRUE(iter->next_batch(&block).is_end_of_file()); @@ -676,7 +676,7 @@ TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) { StorageReadOptions read_opts; read_opts.stats = &stats; std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); @@ -726,7 +726,7 @@ TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) { StorageReadOptions read_opts; read_opts.stats = &stats; std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); @@ -831,7 +831,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) { StorageReadOptions read_opts; read_opts.stats = &stats; std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); @@ -884,7 +884,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) { read_opts.stats = &stats; read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 100); st = iter->next_batch(&block); @@ -917,7 +917,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) { read_opts.stats = &stats; read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 100); st = iter->next_batch(&block); @@ -941,7 +941,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) { read_opts.conditions = conditions.get(); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); int left = 4 * 1024; @@ -998,7 +998,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) { read_opts.conditions = conditions.get(); std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); @@ -1037,7 +1037,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) { read_opts.stats = &stats; std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); ASSERT_TRUE(iter->next_batch(&block).ok()); @@ -1059,7 +1059,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) { read_opts.stats = &stats; std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); ASSERT_TRUE(iter->next_batch(&block).ok()); @@ -1081,7 +1081,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) { read_opts.stats = &stats; std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); ASSERT_FALSE(iter->next_batch(&block).ok()); @@ -1105,7 +1105,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) { read_opts.stats = &stats; std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); ASSERT_TRUE(iter->next_batch(&block).ok()); @@ -1128,7 +1128,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) { read_opts.stats = &stats; std::unique_ptr iter; - segment->new_iterator(schema, read_opts, nullptr, &iter); + segment->new_iterator(schema, read_opts, &iter); RowBlockV2 block(schema, 1024); diff --git a/be/test/runtime/buffered_tuple_stream2_test.cpp b/be/test/runtime/buffered_tuple_stream2_test.cpp index e31d935fc1..4a522ad84f 100644 --- a/be/test/runtime/buffered_tuple_stream2_test.cpp +++ b/be/test/runtime/buffered_tuple_stream2_test.cpp @@ -117,7 +117,7 @@ protected: } virtual RowBatch* CreateIntBatch(int offset, int num_rows, bool gen_null) { - RowBatch* batch = _pool.add(new RowBatch(*_int_desc, num_rows, _tracker.get())); + RowBatch* batch = _pool.add(new RowBatch(*_int_desc, num_rows)); int tuple_size = _int_desc->tuple_descriptors()[0]->byte_size(); uint8_t* tuple_mem = reinterpret_cast( batch->tuple_data_pool()->allocate(tuple_size * num_rows)); @@ -146,7 +146,7 @@ protected: virtual RowBatch* CreateStringBatch(int offset, int num_rows, bool gen_null) { int tuple_size = sizeof(StringValue) + 1; - RowBatch* batch = _pool.add(new RowBatch(*_string_desc, num_rows, _tracker.get())); + RowBatch* batch = _pool.add(new RowBatch(*_string_desc, num_rows)); uint8_t* tuple_mem = batch->tuple_data_pool()->allocate(tuple_size * num_rows); memset(tuple_mem, 0, tuple_size * num_rows); const int string_tuples = _string_desc->tuple_descriptors().size(); diff --git a/be/test/runtime/load_channel_mgr_test.cpp b/be/test/runtime/load_channel_mgr_test.cpp index 56d7106f10..6e683ed17c 100644 --- a/be/test/runtime/load_channel_mgr_test.cpp +++ b/be/test/runtime/load_channel_mgr_test.cpp @@ -45,8 +45,7 @@ OLAPStatus close_status; int64_t wait_lock_time_ns; // mock -DeltaWriter::DeltaWriter(WriteRequest* req, const std::shared_ptr& mem_tracker, - StorageEngine* storage_engine) +DeltaWriter::DeltaWriter(WriteRequest* req, StorageEngine* storage_engine) : _req(*req) {} DeltaWriter::~DeltaWriter() {} @@ -55,12 +54,11 @@ OLAPStatus DeltaWriter::init() { return OLAP_SUCCESS; } -OLAPStatus DeltaWriter::open(WriteRequest* req, const std::shared_ptr& mem_tracker, - DeltaWriter** writer) { +OLAPStatus DeltaWriter::open(WriteRequest* req, DeltaWriter** writer) { if (open_status != OLAP_SUCCESS) { return open_status; } - *writer = new DeltaWriter(req, mem_tracker, nullptr); + *writer = new DeltaWriter(req, nullptr); return open_status; } @@ -192,7 +190,6 @@ TEST_F(LoadChannelMgrTest, normal) { DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); auto tuple_desc = desc_tbl->get_tuple_descriptor(0); RowDescriptor row_desc(*desc_tbl, {0}, {false}); - auto tracker = std::make_shared(); PUniqueId load_id; load_id.set_hi(2); load_id.set_lo(3); @@ -227,7 +224,7 @@ TEST_F(LoadChannelMgrTest, normal) { request.add_tablet_ids(21); request.add_tablet_ids(20); - RowBatch row_batch(row_desc, 1024, tracker.get()); + RowBatch row_batch(row_desc, 1024); // row1 { @@ -393,7 +390,7 @@ TEST_F(LoadChannelMgrTest, add_failed) { request.add_tablet_ids(21); request.add_tablet_ids(20); - RowBatch row_batch(row_desc, 1024, tracker.get()); + RowBatch row_batch(row_desc, 1024); // row1 { @@ -448,7 +445,6 @@ TEST_F(LoadChannelMgrTest, close_failed) { DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); auto tuple_desc = desc_tbl->get_tuple_descriptor(0); RowDescriptor row_desc(*desc_tbl, {0}, {false}); - auto tracker = std::make_shared(); PUniqueId load_id; load_id.set_hi(2); load_id.set_lo(3); @@ -486,7 +482,7 @@ TEST_F(LoadChannelMgrTest, close_failed) { request.add_partition_ids(10); request.add_partition_ids(11); - RowBatch row_batch(row_desc, 1024, tracker.get()); + RowBatch row_batch(row_desc, 1024); // row1 { @@ -540,7 +536,6 @@ TEST_F(LoadChannelMgrTest, unknown_tablet) { DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); auto tuple_desc = desc_tbl->get_tuple_descriptor(0); RowDescriptor row_desc(*desc_tbl, {0}, {false}); - auto tracker = std::make_shared(); PUniqueId load_id; load_id.set_hi(2); load_id.set_lo(3); @@ -575,7 +570,7 @@ TEST_F(LoadChannelMgrTest, unknown_tablet) { request.add_tablet_ids(22); request.add_tablet_ids(20); - RowBatch row_batch(row_desc, 1024, tracker.get()); + RowBatch row_batch(row_desc, 1024); // row1 { @@ -626,7 +621,6 @@ TEST_F(LoadChannelMgrTest, duplicate_packet) { DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); auto tuple_desc = desc_tbl->get_tuple_descriptor(0); RowDescriptor row_desc(*desc_tbl, {0}, {false}); - auto tracker = std::make_shared(); PUniqueId load_id; load_id.set_hi(2); load_id.set_lo(3); @@ -661,7 +655,7 @@ TEST_F(LoadChannelMgrTest, duplicate_packet) { request.add_tablet_ids(21); request.add_tablet_ids(20); - RowBatch row_batch(row_desc, 1024, tracker.get()); + RowBatch row_batch(row_desc, 1024); // row1 { diff --git a/be/test/runtime/memory_scratch_sink_test.cpp b/be/test/runtime/memory_scratch_sink_test.cpp index 00a6dfa8a7..c39b099dee 100644 --- a/be/test/runtime/memory_scratch_sink_test.cpp +++ b/be/test/runtime/memory_scratch_sink_test.cpp @@ -226,8 +226,7 @@ TEST_F(MemoryScratchSinkTest, work_flow_normal) { status = scan_node.open(_state); ASSERT_TRUE(status.ok()); - std::unique_ptr mem_tracker(new MemTracker(-1)); - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), mem_tracker.get()); + RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); bool eos = false; while (!eos) { diff --git a/be/test/runtime/test_env.cc b/be/test/runtime/test_env.cc index 6068e52986..8fdb6715ca 100644 --- a/be/test/runtime/test_env.cc +++ b/be/test/runtime/test_env.cc @@ -89,8 +89,8 @@ Status TestEnv::create_query_state(int64_t query_id, int max_buffers, int block_ std::shared_ptr mgr; RETURN_IF_ERROR(BufferedBlockMgr2::create( - *runtime_state, _block_mgr_parent_tracker, (*runtime_state)->runtime_profile(), - _tmp_file_mgr.get(), calculate_mem_tracker(max_buffers, block_size), block_size, &mgr)); + *runtime_state, (*runtime_state)->runtime_profile(), _tmp_file_mgr.get(), + calculate_mem_tracker(max_buffers, block_size), block_size, &mgr)); (*runtime_state)->set_block_mgr2(mgr); // (*runtime_state)->_block_mgr = mgr; diff --git a/be/test/tools/benchmark_tool.cpp b/be/test/tools/benchmark_tool.cpp index 3a1dd00275..4622c8e4d9 100644 --- a/be/test/tools/benchmark_tool.cpp +++ b/be/test/tools/benchmark_tool.cpp @@ -466,7 +466,7 @@ public: StorageReadOptions read_opts; read_opts.stats = &stats; std::unique_ptr iter; - _segment->new_iterator(get_schema(), read_opts, nullptr, &iter); + _segment->new_iterator(get_schema(), read_opts, &iter); RowBlockV2 block(get_schema(), 1024); int left = _dataset.size(); @@ -511,7 +511,7 @@ public: StorageReadOptions read_opts; read_opts.stats = &stats; std::unique_ptr iter; - _segment->new_iterator(get_schema(), read_opts, nullptr, &iter); + _segment->new_iterator(get_schema(), read_opts, &iter); RowBlockV2 block(get_schema(), 1024); int left = _dataset.size(); diff --git a/be/test/util/arrow/arrow_row_batch_test.cpp b/be/test/util/arrow/arrow_row_batch_test.cpp index 9dc96d3de0..bbcdc6db28 100644 --- a/be/test/util/arrow/arrow_row_batch_test.cpp +++ b/be/test/util/arrow/arrow_row_batch_test.cpp @@ -71,9 +71,8 @@ TEST_F(ArrowRowBatchTest, PrettyPrint) { RowDescriptor* row_desc; auto doris_st = convert_to_row_desc(&obj_pool, *record_batch->schema(), &row_desc); ASSERT_TRUE(doris_st.ok()); - auto tracker = std::make_shared(-1, "PrettyPrintTest"); std::shared_ptr row_batch; - doris_st = convert_to_row_batch(*record_batch, *row_desc, tracker, &row_batch); + doris_st = convert_to_row_batch(*record_batch, *row_desc, &row_batch); ASSERT_TRUE(doris_st.ok()); { diff --git a/be/test/util/arrow/arrow_work_flow_test.cpp b/be/test/util/arrow/arrow_work_flow_test.cpp index 08b9865f3a..d723a6ec04 100644 --- a/be/test/util/arrow/arrow_work_flow_test.cpp +++ b/be/test/util/arrow/arrow_work_flow_test.cpp @@ -305,8 +305,7 @@ TEST_F(ArrowWorkFlowTest, NormalUse) { status = scan_node.open(_state); ASSERT_TRUE(status.ok()); - auto mem_tracker = std::make_shared(-1); - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), mem_tracker.get()); + RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); bool eos = false; while (!eos) { diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 92d9b28c65..a621564d8c 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -57,8 +57,7 @@ TEST(BlockTest, RowBatchCovertToBlock) { auto tuple_desc = const_cast(schema_scanner.tuple_desc()); RowDescriptor row_desc(tuple_desc, false); - auto tracker_ptr = MemTracker::create_tracker(-1, "BlockTest", nullptr); - RowBatch row_batch(row_desc, 1024, tracker_ptr.get()); + RowBatch row_batch(row_desc, 1024); int16_t k1 = -100; int32_t k2 = 100000; diff --git a/be/test/vec/exec/vgeneric_iterators_test.cpp b/be/test/vec/exec/vgeneric_iterators_test.cpp index 94864673a2..20fcb0b780 100644 --- a/be/test/vec/exec/vgeneric_iterators_test.cpp +++ b/be/test/vec/exec/vgeneric_iterators_test.cpp @@ -103,7 +103,7 @@ TEST(VGenericIteratorsTest, Union) { inputs.push_back(vectorized::new_auto_increment_iterator(schema, 200)); inputs.push_back(vectorized::new_auto_increment_iterator(schema, 300)); - auto iter = vectorized::new_union_iterator(inputs, MemTracker::create_tracker(-1, "VUnionIterator", nullptr)); + auto iter = vectorized::new_union_iterator(inputs); StorageReadOptions opts; auto st = iter->init(opts); ASSERT_TRUE(st.ok()); @@ -149,7 +149,7 @@ TEST(VGenericIteratorsTest, Merge) { inputs.push_back(vectorized::new_auto_increment_iterator(schema, 200)); inputs.push_back(vectorized::new_auto_increment_iterator(schema, 300)); - auto iter = vectorized::new_merge_iterator(inputs, MemTracker::create_tracker(-1, "VMergeIterator", nullptr), -1); + auto iter = vectorized::new_merge_iterator(inputs, -1); StorageReadOptions opts; auto st = iter->init(opts); ASSERT_TRUE(st.ok()); @@ -276,7 +276,7 @@ TEST(VGenericIteratorsTest, MergeWithSeqColumn) { inputs.push_back(new SeqColumnUtIterator(schema, num_rows, rows_begin, seq_column_id, seq_id_in_every_file)); } - auto iter = vectorized::new_merge_iterator(inputs, MemTracker::create_tracker(-1, "VMergeIterator", nullptr), seq_column_id); + auto iter = vectorized::new_merge_iterator(inputs, seq_column_id); StorageReadOptions opts; auto st = iter->init(opts); ASSERT_TRUE(st.ok()); diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp index cd2335ade3..f2de7ba5ac 100644 --- a/be/test/vec/exprs/vexpr_test.cpp +++ b/be/test/vec/exprs/vexpr_test.cpp @@ -48,8 +48,7 @@ TEST(TEST_VEXPR, ABSTEST) { auto tuple_desc = const_cast(desc_tbl->get_tuple_descriptor(0)); doris::RowDescriptor row_desc(tuple_desc, false); - auto tracker_ptr = doris::MemTracker::create_tracker(-1, "BlockTest", nullptr); - doris::RowBatch row_batch(row_desc, 1024, tracker_ptr.get()); + doris::RowBatch row_batch(row_desc, 1024); std::string expr_json = R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_6IntValE"}}},"11":{"i64":0}}}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0}}},"20":{"i32":-1},"23":{"i32":-1}}]}})|"; doris::TExpr exprx = apache::thrift::from_json_string(expr_json); @@ -96,8 +95,7 @@ TEST(TEST_VEXPR, ABSTEST2) { schema_scanner.init(¶m, &object_pool); auto tuple_desc = const_cast(schema_scanner.tuple_desc()); RowDescriptor row_desc(tuple_desc, false); - auto tracker_ptr = MemTracker::create_tracker(-1, "BlockTest", nullptr); - RowBatch row_batch(row_desc, 1024, tracker_ptr.get()); + RowBatch row_batch(row_desc, 1024); std::string expr_json = R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_6IntValE"}}},"11":{"i64":0}}}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0}}},"20":{"i32":-1},"23":{"i32":-1}}]}})|"; TExpr exprx = apache::thrift::from_json_string(expr_json); diff --git a/be/test/vec/function/function_comparison_test.cpp b/be/test/vec/function/function_comparison_test.cpp index 1668a253bf..ede480925f 100644 --- a/be/test/vec/function/function_comparison_test.cpp +++ b/be/test/vec/function/function_comparison_test.cpp @@ -38,8 +38,7 @@ TEST(ComparisonTest, ComparisonFunctionTest) { auto tuple_desc = const_cast(schema_scanner.tuple_desc()); RowDescriptor row_desc(tuple_desc, false); - auto tracker_ptr = MemTracker::create_tracker(-1, "BlockTest", nullptr); - RowBatch row_batch(row_desc, 1024, tracker_ptr.get()); + RowBatch row_batch(row_desc, 1024); int16_t k1 = -100; int32_t k2 = 100; diff --git a/docs/en/administrator-guide/config/be_config.md b/docs/en/administrator-guide/config/be_config.md index b93c7c0a17..4707ebfc51 100644 --- a/docs/en/administrator-guide/config/be_config.md +++ b/docs/en/administrator-guide/config/be_config.md @@ -1425,6 +1425,12 @@ The size of the buffer before flashing * Default: 3 +### `track_new_delete` + +* Type: bool +* Description: Whether Hook TCmalloc new/delete, currently consume/release tls mem tracker in Hook. +* Default: true + ### `mem_tracker_level` * Type: int16 diff --git a/docs/zh-CN/administrator-guide/config/be_config.md b/docs/zh-CN/administrator-guide/config/be_config.md index 53fdb23a52..dfb73ff0a4 100644 --- a/docs/zh-CN/administrator-guide/config/be_config.md +++ b/docs/zh-CN/administrator-guide/config/be_config.md @@ -1444,6 +1444,12 @@ webserver默认工作线程数 ``` * 默认值: 3 +### `track_new_delete` + +* 类型:bool +* 描述:是否Hook TCmalloc new/delete,目前在Hook中统计thread local MemTracker。 +* 默认值:true + ### `mem_tracker_level` * 类型: int16 diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index 7e03688b37..330af06537 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -554,7 +554,8 @@ public class Coordinator { if (colocateFragmentIds.contains(fragment.getFragmentId().asInt())) { int rate = Math.min(Config.query_colocate_join_memory_limit_penalty_factor, instanceNum); long newMemory = memoryLimit / rate; - + // TODO(zxy): The meaning of mem limit in query_options has become the real once query mem limit. + // The logic to modify mem_limit here needs to be modified or deleted. for (TExecPlanFragmentParams tParam : tParams) { tParam.query_options.setMemLimit(newMemory); } diff --git a/gensrc/proto/types.proto b/gensrc/proto/types.proto index aaa2a70771..e1f8445620 100644 --- a/gensrc/proto/types.proto +++ b/gensrc/proto/types.proto @@ -222,4 +222,5 @@ enum PPlanFragmentCancelReason { INTERNAL_ERROR = 3; TIMEOUT = 4; CALL_RPC_ERROR = 5; + MEMORY_LIMIT_EXCEED = 6; }