Revert "[opt](inverted index) performance optimization for need_read_data in …" (#36260)
Reverts apache/doris#36192
This commit is contained in:
@ -872,7 +872,6 @@ Status SegmentIterator::_apply_inverted_index_except_leafnode_of_andnode(
|
||||
|
||||
Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
|
||||
for (auto* pred : _col_preds_except_leafnode_of_andnode) {
|
||||
auto column_id = pred->column_id();
|
||||
auto pred_type = pred->type();
|
||||
bool is_support = pred_type == PredicateType::EQ || pred_type == PredicateType::NE ||
|
||||
pred_type == PredicateType::LT || pred_type == PredicateType::LE ||
|
||||
@ -881,7 +880,6 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
|
||||
pred_type == PredicateType::IN_LIST ||
|
||||
pred_type == PredicateType::NOT_IN_LIST;
|
||||
if (!is_support) {
|
||||
_need_read_data_indices[column_id] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -891,17 +889,16 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
|
||||
if (can_apply_by_inverted_index) {
|
||||
res = _apply_inverted_index_except_leafnode_of_andnode(pred, &bitmap);
|
||||
} else {
|
||||
_need_read_data_indices[column_id] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
bool need_remaining_after_evaluate = _column_has_fulltext_index(column_id) &&
|
||||
bool need_remaining_after_evaluate = _column_has_fulltext_index(pred->column_id()) &&
|
||||
PredicateTypeTraits::is_equal_or_list(pred_type);
|
||||
if (!res.ok()) {
|
||||
if (_downgrade_without_index(res, need_remaining_after_evaluate)) {
|
||||
// downgrade without index query
|
||||
_not_apply_index_pred.insert(column_id);
|
||||
_need_read_data_indices[column_id] = true;
|
||||
_not_apply_index_pred.insert(pred->column_id());
|
||||
_need_read_data_indices[pred->column_id()] = true;
|
||||
continue;
|
||||
}
|
||||
LOG(WARNING) << "failed to evaluate index"
|
||||
@ -912,10 +909,17 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
|
||||
|
||||
std::string pred_result_sign = _gen_predicate_result_sign(pred);
|
||||
_rowid_result_for_index.emplace(pred_result_sign, std::make_pair(true, std::move(bitmap)));
|
||||
}
|
||||
|
||||
if (!pred->predicate_params()->marked_by_runtime_filter) {
|
||||
if (!_need_read_data_indices.contains(column_id)) {
|
||||
_need_read_data_indices[column_id] = false;
|
||||
for (auto* pred : _col_preds_except_leafnode_of_andnode) {
|
||||
auto column_name = _schema->column(pred->column_id())->name();
|
||||
if (!_remaining_conjunct_roots.empty() &&
|
||||
_check_column_pred_all_push_down(column_name, true,
|
||||
pred->type() == PredicateType::MATCH) &&
|
||||
!pred->predicate_params()->marked_by_runtime_filter) {
|
||||
// if column's need_read_data already set true, we can not set it to false now.
|
||||
if (_need_read_data_indices.find(pred->column_id()) == _need_read_data_indices.end()) {
|
||||
_need_read_data_indices[pred->column_id()] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1881,10 +1885,6 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
|
||||
continue;
|
||||
}
|
||||
|
||||
DBUG_EXECUTE_IF("segment_iterator._read_columns_by_index", {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("{} does not need to read data");
|
||||
})
|
||||
|
||||
if (is_continuous) {
|
||||
size_t rows_read = nrows_read;
|
||||
_opts.stats->block_first_read_seek_num += 1;
|
||||
|
||||
@ -1,22 +0,0 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
863
|
||||
|
||||
-- !sql --
|
||||
210
|
||||
|
||||
-- !sql --
|
||||
0
|
||||
|
||||
-- !sql --
|
||||
819
|
||||
|
||||
-- !sql --
|
||||
199
|
||||
|
||||
-- !sql --
|
||||
713
|
||||
|
||||
-- !sql --
|
||||
18
|
||||
|
||||
@ -1,94 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_need_read_data_fault_injection", "nonConcurrent") {
|
||||
// define a sql table
|
||||
def indexTbName = "test_need_read_data_fault_injection"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName}"
|
||||
sql """
|
||||
CREATE TABLE ${indexTbName} (
|
||||
`@timestamp` int(11) NULL COMMENT "",
|
||||
`clientip` varchar(20) NULL COMMENT "",
|
||||
`request` text NULL COMMENT "",
|
||||
`status` int(11) NULL COMMENT "",
|
||||
`size` int(11) NULL COMMENT "",
|
||||
INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
|
||||
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`@timestamp`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY RANDOM BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"disable_auto_compaction" = "true"
|
||||
);
|
||||
"""
|
||||
|
||||
def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
|
||||
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
|
||||
|
||||
// load the json data
|
||||
streamLoad {
|
||||
table "${table_name}"
|
||||
|
||||
// set http request header params
|
||||
set 'label', label + "_" + UUID.randomUUID().toString()
|
||||
set 'read_json_by_line', read_flag
|
||||
set 'format', format_flag
|
||||
file file_name // import json file
|
||||
time 10000 // limit inflight 10s
|
||||
if (expected_succ_rows >= 0) {
|
||||
set 'max_filter_ratio', '1'
|
||||
}
|
||||
|
||||
// if declared a check callback, the default check condition will ignore.
|
||||
// So you must check all condition
|
||||
check { result, exception, startTime, endTime ->
|
||||
if (ignore_failure && expected_succ_rows < 0) { return }
|
||||
if (exception != null) {
|
||||
throw exception
|
||||
}
|
||||
log.info("Stream load result: ${result}".toString())
|
||||
def json = parseJson(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
load_httplogs_data.call(indexTbName, 'test_need_read_data_fault_injection', 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
|
||||
try {
|
||||
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
|
||||
|
||||
qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' or request match_phrase 'jpg' or request match_phrase 'gif'); """
|
||||
qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' or request match_phrase 'jpg' and request match_phrase 'gif'); """
|
||||
qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' and request match_phrase 'jpg' and request match_phrase 'gif'); """
|
||||
qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' and request match_phrase 'jpg' or request match_phrase 'gif'); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName} where (clientip match '1' or request match 'jpg' or clientip match '2'); """
|
||||
qt_sql """ select count() from ${indexTbName} where (clientip match '3' or request match 'gif' or clientip match '4'); """
|
||||
qt_sql """ select count() from ${indexTbName} where (clientip match 'images' or clientip match '5' or clientip match 'english'); """
|
||||
|
||||
} finally {
|
||||
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
|
||||
}
|
||||
} finally {
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user