[improvement](scanner) Remove the predicate that is always true for the segment (#25366)
By utilizing the zonemap index of the segment, we can ascertain if a predicate is always true. For example, if the segment’s maximum value is 100 and the predicate is col < 101, then this predicate is always true for this segment.
This commit is contained in:
@ -1109,6 +1109,8 @@ DEFINE_Bool(exit_on_exception, "false");
|
||||
DEFINE_String(doris_cgroup_cpu_path, "");
|
||||
DEFINE_Bool(enable_cpu_hard_limit, "false");
|
||||
|
||||
DEFINE_Bool(ignore_always_true_predicate_for_segment, "true");
|
||||
|
||||
// clang-format off
|
||||
#ifdef BE_TEST
|
||||
// test s3
|
||||
|
||||
@ -1179,6 +1179,9 @@ DECLARE_mBool(exit_on_exception);
|
||||
DECLARE_String(doris_cgroup_cpu_path);
|
||||
DECLARE_Bool(enable_cpu_hard_limit);
|
||||
|
||||
// Remove predicate that is always true for a segment.
|
||||
DECLARE_Bool(ignore_always_true_predicate_for_segment);
|
||||
|
||||
#ifdef BE_TEST
|
||||
// test s3
|
||||
DECLARE_String(test_s3_resource);
|
||||
|
||||
@ -173,6 +173,10 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool is_always_true(const std::pair<WrapperField*, WrapperField*>& statistic) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& statistic) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -174,6 +174,29 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
bool is_always_true(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
|
||||
if (statistic.first->is_null() || statistic.second->is_null()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
T tmp_min_value {};
|
||||
T tmp_max_value {};
|
||||
memcpy((char*)(&tmp_min_value), statistic.first->cell_ptr(), sizeof(WarpperFieldType));
|
||||
memcpy((char*)(&tmp_max_value), statistic.second->cell_ptr(), sizeof(WarpperFieldType));
|
||||
|
||||
if constexpr (PT == PredicateType::LT) {
|
||||
return _value > tmp_max_value;
|
||||
} else if constexpr (PT == PredicateType::LE) {
|
||||
return _value >= tmp_max_value;
|
||||
} else if constexpr (PT == PredicateType::GT) {
|
||||
return _value < tmp_min_value;
|
||||
} else if constexpr (PT == PredicateType::GE) {
|
||||
return _value <= tmp_min_value;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
|
||||
if (statistic.first->is_null() || statistic.second->is_null()) {
|
||||
return false;
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
#include "io/fs/file_reader.h"
|
||||
#include "olap/block_column_predicate.h"
|
||||
#include "olap/column_predicate.h"
|
||||
#include "olap/comparison_predicate.h"
|
||||
#include "olap/decimal12.h"
|
||||
#include "olap/inverted_index_parser.h"
|
||||
#include "olap/iterators.h"
|
||||
@ -338,6 +339,31 @@ bool ColumnReader::match_condition(const AndBlockColumnPredicate* col_predicates
|
||||
col_predicates);
|
||||
}
|
||||
|
||||
bool ColumnReader::prune_predicates_by_zone_map(std::vector<ColumnPredicate*>& predicates,
|
||||
const int column_id) const {
|
||||
if (_zone_map_index == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FieldType type = _type_info->type();
|
||||
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta_length));
|
||||
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta_length));
|
||||
_parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get());
|
||||
|
||||
auto pruned = false;
|
||||
for (auto it = predicates.begin(); it != predicates.end();) {
|
||||
auto predicate = *it;
|
||||
if (predicate->column_id() == column_id &&
|
||||
predicate->is_always_true({min_value.get(), max_value.get()})) {
|
||||
pruned = true;
|
||||
it = predicates.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
return pruned;
|
||||
}
|
||||
|
||||
void ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, WrapperField* min_value_container,
|
||||
WrapperField* max_value_container) const {
|
||||
// min value and max value are valid if has_not_null is true
|
||||
|
||||
@ -162,6 +162,9 @@ public:
|
||||
|
||||
bool is_empty() const { return _num_rows == 0; }
|
||||
|
||||
bool prune_predicates_by_zone_map(std::vector<ColumnPredicate*>& predicates,
|
||||
const int column_id) const;
|
||||
|
||||
CompressionTypePB get_compression() const { return _meta_compression; }
|
||||
|
||||
uint64_t num_rows() const { return _num_rows; }
|
||||
|
||||
@ -129,7 +129,6 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
|
||||
if (read_options.use_topn_opt) {
|
||||
auto query_ctx = read_options.runtime_state->get_query_ctx();
|
||||
auto runtime_predicate = query_ctx->get_runtime_predicate().get_predictate();
|
||||
@ -157,6 +156,25 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o
|
||||
iter->reset(new SegmentIterator(this->shared_from_this(), schema));
|
||||
}
|
||||
|
||||
if (config::ignore_always_true_predicate_for_segment &&
|
||||
read_options.io_ctx.reader_type == ReaderType::READER_QUERY &&
|
||||
!read_options.column_predicates.empty()) {
|
||||
auto pruned_predicates = read_options.column_predicates;
|
||||
auto pruned = false;
|
||||
for (auto& it : _column_readers) {
|
||||
if (it.second->prune_predicates_by_zone_map(pruned_predicates, it.first)) {
|
||||
pruned = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (pruned) {
|
||||
auto options_with_pruned_predicates = read_options;
|
||||
options_with_pruned_predicates.column_predicates = pruned_predicates;
|
||||
LOG(INFO) << "column_predicates pruned from " << read_options.column_predicates.size()
|
||||
<< " to " << pruned_predicates.size();
|
||||
return iter->get()->init(options_with_pruned_predicates);
|
||||
}
|
||||
}
|
||||
return iter->get()->init(read_options);
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,25 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !select1 --
|
||||
1 jerry 2020-10-01
|
||||
2 tom 2020-10-02
|
||||
3 jack 2020-10-01
|
||||
4 tony 2020-10-02
|
||||
|
||||
-- !select2 --
|
||||
1 jerry 2020-10-01
|
||||
3 jack 2020-10-01
|
||||
|
||||
-- !select3 --
|
||||
|
||||
-- !select4 --
|
||||
1 jerry 2020-10-01
|
||||
2 tom 2020-10-02
|
||||
3 jack 2020-10-01
|
||||
4 tony 2020-10-02
|
||||
|
||||
-- !select5 --
|
||||
2 tom 2020-10-02
|
||||
4 tony 2020-10-02
|
||||
|
||||
-- !select6 --
|
||||
|
||||
@ -0,0 +1,67 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
suite("test_select_with_predicate_prune") {
|
||||
sql """
|
||||
drop table if exists `test_select_with_predicate_prune`;
|
||||
"""
|
||||
sql """
|
||||
CREATE TABLE IF NOT EXISTS `test_select_with_predicate_prune` (
|
||||
id int,
|
||||
name string,
|
||||
birthday date not null
|
||||
)
|
||||
duplicate key(`id`)
|
||||
AUTO PARTITION BY LIST (`birthday`)()
|
||||
DISTRIBUTED BY HASH(`id`) buckets 1
|
||||
PROPERTIES
|
||||
(
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
insert into test_select_with_predicate_prune values (1, 'jerry', '2020-10-01'), (2, 'tom', '2020-10-02');
|
||||
"""
|
||||
sql """
|
||||
insert into test_select_with_predicate_prune values (3, 'jack', '2020-10-01'), (4, 'tony', '2020-10-02');
|
||||
"""
|
||||
|
||||
qt_select1 """
|
||||
select * from test_select_with_predicate_prune where birthday < '2020-10-03' order by id;
|
||||
"""
|
||||
|
||||
qt_select2 """
|
||||
select * from test_select_with_predicate_prune where birthday < '2020-10-02' order by id;
|
||||
"""
|
||||
|
||||
qt_select3 """
|
||||
select * from test_select_with_predicate_prune where birthday < '2020-10-01' order by id;
|
||||
"""
|
||||
|
||||
|
||||
qt_select4 """
|
||||
select * from test_select_with_predicate_prune where birthday > '2020-09-30' order by id;
|
||||
"""
|
||||
|
||||
qt_select5 """
|
||||
select * from test_select_with_predicate_prune where birthday > '2020-10-01' order by id;
|
||||
"""
|
||||
|
||||
qt_select6 """
|
||||
select * from test_select_with_predicate_prune where birthday > '2020-10-02' order by id;
|
||||
"""
|
||||
}
|
||||
Reference in New Issue
Block a user