[bugfix](ngram bf index) process differently for normal bloom filter index and ngram bf index (#21310)
* process differently for normal bloom filter index and ngram bf index * fix review comments for readbility * add test case * add testcase for delete condition
This commit is contained in:
@ -148,7 +148,9 @@ public:
|
||||
|
||||
bool evaluate_and(const BloomFilter* bf) const override { return _nested->evaluate_and(bf); }
|
||||
|
||||
bool can_do_bloom_filter() const override { return _nested->can_do_bloom_filter(); }
|
||||
bool can_do_bloom_filter(bool ngram) const override {
|
||||
return _nested->can_do_bloom_filter(ngram);
|
||||
}
|
||||
|
||||
void evaluate_vec(const vectorized::IColumn& column, uint16_t size,
|
||||
bool* flags) const override {
|
||||
|
||||
@ -87,7 +87,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool can_do_bloom_filter() const { return false; }
|
||||
virtual bool can_do_bloom_filter(bool ngram) const { return false; }
|
||||
|
||||
//evaluate predicate on inverted
|
||||
virtual Status evaluate(const std::string& column_name, InvertedIndexIterator* iterator,
|
||||
@ -121,7 +121,9 @@ public:
|
||||
|
||||
void evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const override;
|
||||
|
||||
bool can_do_bloom_filter() const override { return _predicate->can_do_bloom_filter(); }
|
||||
bool can_do_bloom_filter(bool ngram) const override {
|
||||
return _predicate->can_do_bloom_filter(ngram);
|
||||
}
|
||||
|
||||
private:
|
||||
const ColumnPredicate* _predicate;
|
||||
@ -188,9 +190,9 @@ public:
|
||||
|
||||
bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const override;
|
||||
|
||||
bool can_do_bloom_filter() const override {
|
||||
bool can_do_bloom_filter(bool ngram) const override {
|
||||
for (auto& pred : _block_column_predicate_vec) {
|
||||
if (!pred->can_do_bloom_filter()) {
|
||||
if (!pred->can_do_bloom_filter(ngram)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -183,7 +183,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool can_do_bloom_filter() const { return false; }
|
||||
virtual bool can_do_bloom_filter(bool ngram) const { return false; }
|
||||
|
||||
// used to evaluate pre read column in lazy materialization
|
||||
// now only support integer/float
|
||||
|
||||
@ -244,6 +244,8 @@ public:
|
||||
|
||||
bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
|
||||
if constexpr (PT == PredicateType::EQ) {
|
||||
// EQ predicate can not use ngram bf, just return true to accept
|
||||
if (bf->is_ngram_bf()) return true;
|
||||
if constexpr (std::is_same_v<T, StringRef>) {
|
||||
return bf->test_bytes(_value.data, _value.size);
|
||||
} else if constexpr (Type == TYPE_DATE) {
|
||||
@ -272,7 +274,9 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool can_do_bloom_filter() const override { return PT == PredicateType::EQ; }
|
||||
bool can_do_bloom_filter(bool ngram) const override {
|
||||
return PT == PredicateType::EQ && !ngram;
|
||||
}
|
||||
|
||||
void evaluate_or(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size,
|
||||
bool* flags) const override {
|
||||
|
||||
@ -381,6 +381,8 @@ public:
|
||||
|
||||
bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
|
||||
if constexpr (PT == PredicateType::IN_LIST) {
|
||||
// IN predicate can not use ngram bf, just return true to accept
|
||||
if (bf->is_ngram_bf()) return true;
|
||||
HybridSetBase::IteratorBase* iter = _values->begin();
|
||||
while (iter->has_next()) {
|
||||
if constexpr (std::is_same_v<T, StringRef>) {
|
||||
@ -408,7 +410,9 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
bool can_do_bloom_filter() const override { return PT == PredicateType::IN_LIST; }
|
||||
bool can_do_bloom_filter(bool ngram) const override {
|
||||
return PT == PredicateType::IN_LIST && !ngram;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename LeftT, typename RightT>
|
||||
|
||||
@ -76,12 +76,14 @@ public:
|
||||
_page_ng_bf = std::move(src);
|
||||
}
|
||||
bool evaluate_and(const BloomFilter* bf) const override {
|
||||
// like predicate can not use normal bf, just return true to accept
|
||||
if (!bf->is_ngram_bf()) return true;
|
||||
if (_page_ng_bf) {
|
||||
return bf->contains(*_page_ng_bf);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool can_do_bloom_filter() const override { return true; }
|
||||
bool can_do_bloom_filter(bool ngram) const override { return ngram; }
|
||||
|
||||
private:
|
||||
template <bool is_and>
|
||||
|
||||
@ -84,6 +84,8 @@ public:
|
||||
}
|
||||
|
||||
bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
|
||||
// null predicate can not use ngram bf, just return true to accept
|
||||
if (bf->is_ngram_bf()) return true;
|
||||
if (_is_null) {
|
||||
return bf->test_bytes(nullptr, 0);
|
||||
} else {
|
||||
@ -92,7 +94,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
bool can_do_bloom_filter() const override { return _is_null; }
|
||||
bool can_do_bloom_filter(bool ngram) const override { return _is_null && !ngram; }
|
||||
|
||||
void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* flags) const override;
|
||||
|
||||
|
||||
@ -1213,7 +1213,8 @@ Status FileColumnIterator::get_row_ranges_by_zone_map(
|
||||
|
||||
Status FileColumnIterator::get_row_ranges_by_bloom_filter(
|
||||
const AndBlockColumnPredicate* col_predicates, RowRanges* row_ranges) {
|
||||
if (col_predicates->can_do_bloom_filter() && _reader->has_bloom_filter_index()) {
|
||||
if ((col_predicates->can_do_bloom_filter(false) && _reader->has_bloom_filter_index(false)) ||
|
||||
(col_predicates->can_do_bloom_filter(true) && _reader->has_bloom_filter_index(true))) {
|
||||
RETURN_IF_ERROR(_reader->get_row_ranges_by_bloom_filter(col_predicates, row_ranges));
|
||||
}
|
||||
return Status::OK();
|
||||
|
||||
@ -137,7 +137,15 @@ public:
|
||||
|
||||
bool has_zone_map() const { return _zone_map_index_meta != nullptr; }
|
||||
bool has_bitmap_index() const { return _bitmap_index_meta != nullptr; }
|
||||
bool has_bloom_filter_index() const { return _bf_index_meta != nullptr; }
|
||||
bool has_bloom_filter_index(bool ngram) const {
|
||||
if (_bf_index_meta == nullptr) return false;
|
||||
|
||||
if (ngram) {
|
||||
return _bf_index_meta->algorithm() == BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER;
|
||||
} else {
|
||||
return _bf_index_meta->algorithm() != BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this column could match `cond' using segment zone map.
|
||||
// Since segment zone map is stored in metadata, this function is fast without I/O.
|
||||
|
||||
@ -0,0 +1,33 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !select_all_1 --
|
||||
1 dt_bjn001 p9-webcast-sign.douyinpic.com test /%/7212503657802320699% /test 100 false
|
||||
1 dt_bjn001 p9-webcast-sign.douyinpic.com test /%/7212503657802320699%xxx /test 100 false
|
||||
|
||||
-- !select_eq_1 --
|
||||
1 dt_bjn001 p9-webcast-sign.douyinpic.com test /%/7212503657802320699% /test 100 false
|
||||
|
||||
-- !select_in_1 --
|
||||
1 dt_bjn001 p9-webcast-sign.douyinpic.com test /%/7212503657802320699% /test 100 false
|
||||
|
||||
-- !select_like_1 --
|
||||
1 dt_bjn001 p9-webcast-sign.douyinpic.com test /%/7212503657802320699% /test 100 false
|
||||
1 dt_bjn001 p9-webcast-sign.douyinpic.com test /%/7212503657802320699%xxx /test 100 false
|
||||
|
||||
-- !select_all_2 --
|
||||
1 dt_bjn001 p9-webcast-sign.douyinpic.com test /%/7212503657802320699%xxx /test 100 false
|
||||
|
||||
-- !select_eq_2 --
|
||||
|
||||
-- !select_in_2 --
|
||||
|
||||
-- !select_like_2 --
|
||||
1 dt_bjn001 p9-webcast-sign.douyinpic.com test /%/7212503657802320699%xxx /test 100 false
|
||||
|
||||
-- !select_all_3 --
|
||||
|
||||
-- !select_eq_3 --
|
||||
|
||||
-- !select_in_3 --
|
||||
|
||||
-- !select_like_3 --
|
||||
|
||||
@ -0,0 +1,62 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
suite("test_ngram_bloomfilter_index") {
|
||||
// todo: test bitmap index, such as create, drop, alter table index
|
||||
def tableName = 'test_ngram_bloomfilter_index'
|
||||
sql "DROP TABLE IF EXISTS ${tableName}"
|
||||
sql """
|
||||
CREATE TABLE IF NOT EXISTS ${tableName} (
|
||||
`key_id` bigint(20) NULL COMMENT '',
|
||||
`category` varchar(200) NULL COMMENT '',
|
||||
`https_url` varchar(300) NULL COMMENT '',
|
||||
`hostname` varchar(300) NULL,
|
||||
`http_url` text NULL COMMENT '',
|
||||
`url_path` varchar(2000) NULL COMMENT '',
|
||||
`cnt` bigint(20) NULL COMMENT '',
|
||||
`host_flag` boolean NULL COMMENT '',
|
||||
INDEX idx_ngrambf (`http_url`) USING NGRAM_BF PROPERTIES("gram_size" = "2", "bf_size" = "512")
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`key_id`, `category`)
|
||||
COMMENT 'OLAP'
|
||||
DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
|
||||
PROPERTIES("replication_num" = "1");
|
||||
"""
|
||||
|
||||
sql "INSERT INTO ${tableName} values (1, 'dt_bjn001', 'p9-webcast-sign.douyinpic.com', 'test', '/%/7212503657802320699%', '/test', 100, false);"
|
||||
sql "INSERT INTO ${tableName} values (1, 'dt_bjn001', 'p9-webcast-sign.douyinpic.com', 'test', '/%/7212503657802320699%xxx', '/test', 100, false);"
|
||||
|
||||
|
||||
sql "SET enable_function_pushdown = true"
|
||||
|
||||
qt_select_all_1 "SELECT * FROM ${tableName}"
|
||||
qt_select_eq_1 "SELECT * FROM ${tableName} WHERE http_url = '/%/7212503657802320699%'"
|
||||
qt_select_in_1 "SELECT * FROM ${tableName} WHERE http_url IN ('/%/7212503657802320699%')"
|
||||
qt_select_like_1 "SELECT * FROM ${tableName} WHERE http_url like '/%/7212503657802320699%'"
|
||||
|
||||
// delete and then select
|
||||
sql "DELETE FROM ${tableName} WHERE http_url IN ('/%/7212503657802320699%')"
|
||||
qt_select_all_2 "SELECT * FROM ${tableName}"
|
||||
qt_select_eq_2 "SELECT * FROM ${tableName} WHERE http_url = '/%/7212503657802320699%'"
|
||||
qt_select_in_2 "SELECT * FROM ${tableName} WHERE http_url IN ('/%/7212503657802320699%')"
|
||||
qt_select_like_2 "SELECT * FROM ${tableName} WHERE http_url like '/%/7212503657802320699%'"
|
||||
|
||||
sql "DELETE FROM ${tableName} WHERE http_url = '/%/7212503657802320699%xxx'"
|
||||
qt_select_all_3 "SELECT * FROM ${tableName}"
|
||||
qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url = '/%/7212503657802320699%'"
|
||||
qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN ('/%/7212503657802320699%')"
|
||||
qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like '/%/7212503657802320699%'"
|
||||
}
|
||||
Reference in New Issue
Block a user