[feature](inverted index) Add profile statistics for each condition in inverted index filters (#48459)

https://github.com/apache/doris/pull/47504
This commit is contained in:
zzzxl
2025-03-01 11:00:19 +08:00
committed by GitHub
parent 0dd532f487
commit cd3e1dce74
10 changed files with 169 additions and 2 deletions

View File

@ -0,0 +1,57 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string>
#include <vector>
#include "olap/inverted_index_stats.h"
#include "util/runtime_profile.h"
namespace doris {
class InvertedIndexProfileReporter {
public:
InvertedIndexProfileReporter() = default;
~InvertedIndexProfileReporter() = default;
void update(RuntimeProfile* profile, const InvertedIndexStatistics* statistics) {
// Determine the iteration limit: the smaller of 20 or the size of statistics->stats
size_t iteration_limit = std::min<size_t>(20, statistics->stats.size());
for (size_t i = 0; i < iteration_limit; ++i) {
const auto& stats = statistics->stats[i];
ADD_TIMER_WITH_LEVEL(profile, hit_rows_name, 1);
auto* hit_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "fr_" + stats.column_name,
TUnit::UNIT, hit_rows_name, 1);
COUNTER_UPDATE(hit_rows, stats.hit_rows);
ADD_TIMER_WITH_LEVEL(profile, exec_time_name, 1);
auto* exec_time = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ft_" + stats.column_name,
TUnit::TIME_NS, exec_time_name, 1);
COUNTER_UPDATE(exec_time, stats.exec_time);
}
}
private:
static constexpr const char* hit_rows_name = "HitRows";
static constexpr const char* exec_time_name = "ExecTime";
};
} // namespace doris

View File

@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <vector>
namespace doris {
struct InvertedIndexQueryStatistics {
std::string column_name;
int64_t hit_rows = 0;
int64_t exec_time = 0;
};
struct InvertedIndexStatistics {
std::vector<InvertedIndexQueryStatistics> stats;
};
} // namespace doris

View File

@ -36,6 +36,7 @@
#include "common/config.h"
#include "io/io_common.h"
#include "olap/inverted_index_stats.h"
#include "olap/olap_define.h"
#include "olap/rowset/rowset_fwd.h"
#include "util/hash_util.hpp"
@ -377,6 +378,7 @@ struct OlapReaderStatistics {
int64_t inverted_index_searcher_search_timer = 0;
int64_t inverted_index_searcher_cache_hit = 0;
int64_t inverted_index_searcher_cache_miss = 0;
InvertedIndexStatistics inverted_index_stats;
int64_t output_index_result_column_timer = 0;
// number of segment filtered by column stat when creating seg iterator

View File

@ -1220,8 +1220,24 @@ Status InvertedIndexIterator::read_from_inverted_index(
}
}
RETURN_IF_ERROR(
_reader->query(_stats, _runtime_state, column_name, query_value, query_type, bit_map));
auto execute_query = [&]() {
return _reader->query(_stats, _runtime_state, column_name, query_value, query_type,
bit_map);
};
if (_runtime_state->query_options().enable_profile) {
InvertedIndexQueryStatistics query_stats;
{
SCOPED_RAW_TIMER(&query_stats.exec_time);
RETURN_IF_ERROR(execute_query());
}
query_stats.column_name = column_name;
query_stats.hit_rows = bit_map->cardinality();
_stats->inverted_index_stats.stats.emplace_back(query_stats);
} else {
RETURN_IF_ERROR(execute_query());
}
return Status::OK();
}

View File

@ -190,6 +190,10 @@ Status OlapScanLocalState::_init_profile() {
_segment_create_column_readers_timer =
ADD_TIMER(_scanner_profile, "SegmentCreateColumnReadersTimer");
_segment_load_index_timer = ADD_TIMER(_scanner_profile, "SegmentLoadIndexTimer");
_index_filter_profile = std::make_unique<RuntimeProfile>("IndexFilter");
_scanner_profile->add_child(_index_filter_profile.get(), true, nullptr);
return Status::OK();
}

View File

@ -97,6 +97,7 @@ private:
std::set<int32_t> _maybe_read_column_ids;
std::unique_ptr<RuntimeProfile> _segment_profile;
std::unique_ptr<RuntimeProfile> _index_filter_profile;
RuntimeProfile::Counter* _num_disks_accessed_counter = nullptr;

View File

@ -195,6 +195,10 @@ Status NewOlapScanNode::_init_profile() {
_total_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentTotal", TUnit::UNIT);
_runtime_filter_info = ADD_LABEL_COUNTER_WITH_LEVEL(_runtime_profile, "RuntimeFilterInfo", 1);
_index_filter_profile = std::make_unique<RuntimeProfile>("IndexFilter");
_scanner_profile->add_child(_index_filter_profile.get(), true, nullptr);
return Status::OK();
}

View File

@ -122,6 +122,7 @@ private:
private:
std::unique_ptr<RuntimeProfile> _segment_profile;
std::unique_ptr<RuntimeProfile> _index_filter_profile;
RuntimeProfile::Counter* _num_disks_accessed_counter = nullptr;

View File

@ -37,6 +37,7 @@
#include "exprs/function_filter.h"
#include "io/cache/block/block_file_cache_profile.h"
#include "io/io_common.h"
#include "olap/inverted_index_profile.h"
#include "olap/olap_common.h"
#include "olap/olap_tuple.h"
#include "olap/rowset/rowset.h"
@ -638,6 +639,9 @@ void NewOlapScanner::_collect_profile_before_close() {
stats.inverted_index_searcher_cache_hit); \
COUNTER_UPDATE(Parent->_inverted_index_searcher_cache_miss_counter, \
stats.inverted_index_searcher_cache_miss); \
InvertedIndexProfileReporter inverted_index_profile; \
inverted_index_profile.update(Parent->_index_filter_profile.get(), \
&stats.inverted_index_stats); \
if (config::enable_file_cache) { \
io::FileCacheProfileReporter cache_profile(Parent->_segment_profile.get()); \
cache_profile.update(&stats.file_cache_stats); \

View File

@ -0,0 +1,44 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "olap/inverted_index_profile.h"
#include <gtest/gtest.h>
#include <memory>
#include "olap/inverted_index_stats.h"
namespace doris {
TEST(InvertedIndexProfileReporterTest, UpdateTest) {
auto runtime_profile = std::make_unique<RuntimeProfile>("test_profile");
InvertedIndexStatistics statistics;
statistics.stats.push_back({"test_column1", 101, 201});
statistics.stats.push_back({"test_column2", 102, 202});
InvertedIndexProfileReporter reporter;
reporter.update(runtime_profile.get(), &statistics);
ASSERT_EQ(runtime_profile->get_counter("fr_test_column1")->value(), 101);
ASSERT_EQ(runtime_profile->get_counter("ft_test_column1")->value(), 201);
ASSERT_EQ(runtime_profile->get_counter("fr_test_column2")->value(), 102);
ASSERT_EQ(runtime_profile->get_counter("ft_test_column2")->value(), 202);
}
} // namespace doris