From cd3e1dce74fa3a4f4e271e2af56b1754f1e147ec Mon Sep 17 00:00:00 2001 From: zzzxl Date: Sat, 1 Mar 2025 11:00:19 +0800 Subject: [PATCH] [feature](inverted index) Add profile statistics for each condition in inverted index filters (#48459) https://github.com/apache/doris/pull/47504 --- be/src/olap/inverted_index_profile.h | 57 +++++++++++++++++++ be/src/olap/inverted_index_stats.h | 34 +++++++++++ be/src/olap/olap_common.h | 2 + .../segment_v2/inverted_index_reader.cpp | 20 ++++++- be/src/pipeline/exec/olap_scan_operator.cpp | 4 ++ be/src/pipeline/exec/olap_scan_operator.h | 1 + be/src/vec/exec/scan/new_olap_scan_node.cpp | 4 ++ be/src/vec/exec/scan/new_olap_scan_node.h | 1 + be/src/vec/exec/scan/new_olap_scanner.cpp | 4 ++ be/test/olap/inverted_index_profile_test.cpp | 44 ++++++++++++++ 10 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 be/src/olap/inverted_index_profile.h create mode 100644 be/src/olap/inverted_index_stats.h create mode 100644 be/test/olap/inverted_index_profile_test.cpp diff --git a/be/src/olap/inverted_index_profile.h b/be/src/olap/inverted_index_profile.h new file mode 100644 index 0000000000..9255e41dc5 --- /dev/null +++ b/be/src/olap/inverted_index_profile.h @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "olap/inverted_index_stats.h" +#include "util/runtime_profile.h" + +namespace doris { + +class InvertedIndexProfileReporter { +public: + InvertedIndexProfileReporter() = default; + ~InvertedIndexProfileReporter() = default; + + void update(RuntimeProfile* profile, const InvertedIndexStatistics* statistics) { + // Determine the iteration limit: the smaller of 20 or the size of statistics->stats + size_t iteration_limit = std::min(20, statistics->stats.size()); + + for (size_t i = 0; i < iteration_limit; ++i) { + const auto& stats = statistics->stats[i]; + + ADD_TIMER_WITH_LEVEL(profile, hit_rows_name, 1); + auto* hit_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "fr_" + stats.column_name, + TUnit::UNIT, hit_rows_name, 1); + COUNTER_UPDATE(hit_rows, stats.hit_rows); + + ADD_TIMER_WITH_LEVEL(profile, exec_time_name, 1); + auto* exec_time = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ft_" + stats.column_name, + TUnit::TIME_NS, exec_time_name, 1); + COUNTER_UPDATE(exec_time, stats.exec_time); + } + } + +private: + static constexpr const char* hit_rows_name = "HitRows"; + static constexpr const char* exec_time_name = "ExecTime"; +}; + +} // namespace doris diff --git a/be/src/olap/inverted_index_stats.h b/be/src/olap/inverted_index_stats.h new file mode 100644 index 0000000000..b82b230f41 --- /dev/null +++ b/be/src/olap/inverted_index_stats.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +namespace doris { + +struct InvertedIndexQueryStatistics { + std::string column_name; + int64_t hit_rows = 0; + int64_t exec_time = 0; +}; + +struct InvertedIndexStatistics { + std::vector stats; +}; + +} // namespace doris diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 77d5228de4..044b7eb45d 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -36,6 +36,7 @@ #include "common/config.h" #include "io/io_common.h" +#include "olap/inverted_index_stats.h" #include "olap/olap_define.h" #include "olap/rowset/rowset_fwd.h" #include "util/hash_util.hpp" @@ -377,6 +378,7 @@ struct OlapReaderStatistics { int64_t inverted_index_searcher_search_timer = 0; int64_t inverted_index_searcher_cache_hit = 0; int64_t inverted_index_searcher_cache_miss = 0; + InvertedIndexStatistics inverted_index_stats; int64_t output_index_result_column_timer = 0; // number of segment filtered by column stat when creating seg iterator diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 1a20f84a1b..7281c3a6fe 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -1220,8 +1220,24 @@ Status InvertedIndexIterator::read_from_inverted_index( } } - RETURN_IF_ERROR( - _reader->query(_stats, _runtime_state, column_name, query_value, query_type, bit_map)); + auto execute_query = [&]() { + return _reader->query(_stats, _runtime_state, column_name, query_value, query_type, + bit_map); + }; + + if (_runtime_state->query_options().enable_profile) { + InvertedIndexQueryStatistics query_stats; + { + SCOPED_RAW_TIMER(&query_stats.exec_time); + RETURN_IF_ERROR(execute_query()); + } + query_stats.column_name = column_name; + query_stats.hit_rows = bit_map->cardinality(); + _stats->inverted_index_stats.stats.emplace_back(query_stats); + } else { + RETURN_IF_ERROR(execute_query()); + } + return Status::OK(); } diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index aa7413b4a1..63735161c5 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -190,6 +190,10 @@ Status OlapScanLocalState::_init_profile() { _segment_create_column_readers_timer = ADD_TIMER(_scanner_profile, "SegmentCreateColumnReadersTimer"); _segment_load_index_timer = ADD_TIMER(_scanner_profile, "SegmentLoadIndexTimer"); + + _index_filter_profile = std::make_unique("IndexFilter"); + _scanner_profile->add_child(_index_filter_profile.get(), true, nullptr); + return Status::OK(); } diff --git a/be/src/pipeline/exec/olap_scan_operator.h b/be/src/pipeline/exec/olap_scan_operator.h index de00cd4f37..37c460a575 100644 --- a/be/src/pipeline/exec/olap_scan_operator.h +++ b/be/src/pipeline/exec/olap_scan_operator.h @@ -97,6 +97,7 @@ private: std::set _maybe_read_column_ids; std::unique_ptr _segment_profile; + std::unique_ptr _index_filter_profile; RuntimeProfile::Counter* _num_disks_accessed_counter = nullptr; diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index 16236b96e2..4415960ff3 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -195,6 +195,10 @@ Status NewOlapScanNode::_init_profile() { _total_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentTotal", TUnit::UNIT); _runtime_filter_info = ADD_LABEL_COUNTER_WITH_LEVEL(_runtime_profile, "RuntimeFilterInfo", 1); + + _index_filter_profile = std::make_unique("IndexFilter"); + _scanner_profile->add_child(_index_filter_profile.get(), true, nullptr); + return Status::OK(); } diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h index fd634dbdae..dfa1842e77 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.h +++ b/be/src/vec/exec/scan/new_olap_scan_node.h @@ -122,6 +122,7 @@ private: private: std::unique_ptr _segment_profile; + std::unique_ptr _index_filter_profile; RuntimeProfile::Counter* _num_disks_accessed_counter = nullptr; diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 6ffb60f425..237ca738f4 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -37,6 +37,7 @@ #include "exprs/function_filter.h" #include "io/cache/block/block_file_cache_profile.h" #include "io/io_common.h" +#include "olap/inverted_index_profile.h" #include "olap/olap_common.h" #include "olap/olap_tuple.h" #include "olap/rowset/rowset.h" @@ -638,6 +639,9 @@ void NewOlapScanner::_collect_profile_before_close() { stats.inverted_index_searcher_cache_hit); \ COUNTER_UPDATE(Parent->_inverted_index_searcher_cache_miss_counter, \ stats.inverted_index_searcher_cache_miss); \ + InvertedIndexProfileReporter inverted_index_profile; \ + inverted_index_profile.update(Parent->_index_filter_profile.get(), \ + &stats.inverted_index_stats); \ if (config::enable_file_cache) { \ io::FileCacheProfileReporter cache_profile(Parent->_segment_profile.get()); \ cache_profile.update(&stats.file_cache_stats); \ diff --git a/be/test/olap/inverted_index_profile_test.cpp b/be/test/olap/inverted_index_profile_test.cpp new file mode 100644 index 0000000000..e3aa355560 --- /dev/null +++ b/be/test/olap/inverted_index_profile_test.cpp @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/inverted_index_profile.h" + +#include + +#include + +#include "olap/inverted_index_stats.h" + +namespace doris { + +TEST(InvertedIndexProfileReporterTest, UpdateTest) { + auto runtime_profile = std::make_unique("test_profile"); + + InvertedIndexStatistics statistics; + statistics.stats.push_back({"test_column1", 101, 201}); + statistics.stats.push_back({"test_column2", 102, 202}); + + InvertedIndexProfileReporter reporter; + reporter.update(runtime_profile.get(), &statistics); + + ASSERT_EQ(runtime_profile->get_counter("fr_test_column1")->value(), 101); + ASSERT_EQ(runtime_profile->get_counter("ft_test_column1")->value(), 201); + ASSERT_EQ(runtime_profile->get_counter("fr_test_column2")->value(), 102); + ASSERT_EQ(runtime_profile->get_counter("ft_test_column2")->value(), 202); +} + +} // namespace doris \ No newline at end of file