From 845dcce7f031814ea6793775d253161e7de05533 Mon Sep 17 00:00:00 2001 From: zzzxl <33418555+zzzxl1993@users.noreply.github.com> Date: Thu, 13 Jun 2024 21:31:20 +0800 Subject: [PATCH] =?UTF-8?q?Revert=20"[opt](inverted=20index)=20performance?= =?UTF-8?q?=20optimization=20for=20need=5Fread=5Fdata=20in=20=E2=80=A6"=20?= =?UTF-8?q?(#36260)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts apache/doris#36192 --- .../rowset/segment_v2/segment_iterator.cpp | 26 ++--- .../test_need_read_data_fault_injection.out | 22 ----- ...test_need_read_data_fault_injection.groovy | 94 ------------------- 3 files changed, 13 insertions(+), 129 deletions(-) delete mode 100644 regression-test/data/fault_injection_p0/test_need_read_data_fault_injection.out delete mode 100644 regression-test/suites/fault_injection_p0/test_need_read_data_fault_injection.groovy diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 2e369d2c7f..9702ef4a58 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -872,7 +872,6 @@ Status SegmentIterator::_apply_inverted_index_except_leafnode_of_andnode( Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { for (auto* pred : _col_preds_except_leafnode_of_andnode) { - auto column_id = pred->column_id(); auto pred_type = pred->type(); bool is_support = pred_type == PredicateType::EQ || pred_type == PredicateType::NE || pred_type == PredicateType::LT || pred_type == PredicateType::LE || @@ -881,7 +880,6 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { pred_type == PredicateType::IN_LIST || pred_type == PredicateType::NOT_IN_LIST; if (!is_support) { - _need_read_data_indices[column_id] = true; continue; } @@ -891,17 +889,16 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { if (can_apply_by_inverted_index) { res = _apply_inverted_index_except_leafnode_of_andnode(pred, &bitmap); } else { - _need_read_data_indices[column_id] = true; continue; } - bool need_remaining_after_evaluate = _column_has_fulltext_index(column_id) && + bool need_remaining_after_evaluate = _column_has_fulltext_index(pred->column_id()) && PredicateTypeTraits::is_equal_or_list(pred_type); if (!res.ok()) { if (_downgrade_without_index(res, need_remaining_after_evaluate)) { // downgrade without index query - _not_apply_index_pred.insert(column_id); - _need_read_data_indices[column_id] = true; + _not_apply_index_pred.insert(pred->column_id()); + _need_read_data_indices[pred->column_id()] = true; continue; } LOG(WARNING) << "failed to evaluate index" @@ -912,10 +909,17 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { std::string pred_result_sign = _gen_predicate_result_sign(pred); _rowid_result_for_index.emplace(pred_result_sign, std::make_pair(true, std::move(bitmap))); + } - if (!pred->predicate_params()->marked_by_runtime_filter) { - if (!_need_read_data_indices.contains(column_id)) { - _need_read_data_indices[column_id] = false; + for (auto* pred : _col_preds_except_leafnode_of_andnode) { + auto column_name = _schema->column(pred->column_id())->name(); + if (!_remaining_conjunct_roots.empty() && + _check_column_pred_all_push_down(column_name, true, + pred->type() == PredicateType::MATCH) && + !pred->predicate_params()->marked_by_runtime_filter) { + // if column's need_read_data already set true, we can not set it to false now. + if (_need_read_data_indices.find(pred->column_id()) == _need_read_data_indices.end()) { + _need_read_data_indices[pred->column_id()] = false; } } } @@ -1881,10 +1885,6 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32 continue; } - DBUG_EXECUTE_IF("segment_iterator._read_columns_by_index", { - return Status::Error("{} does not need to read data"); - }) - if (is_continuous) { size_t rows_read = nrows_read; _opts.stats->block_first_read_seek_num += 1; diff --git a/regression-test/data/fault_injection_p0/test_need_read_data_fault_injection.out b/regression-test/data/fault_injection_p0/test_need_read_data_fault_injection.out deleted file mode 100644 index 37885e404d..0000000000 --- a/regression-test/data/fault_injection_p0/test_need_read_data_fault_injection.out +++ /dev/null @@ -1,22 +0,0 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !sql -- -863 - --- !sql -- -210 - --- !sql -- -0 - --- !sql -- -819 - --- !sql -- -199 - --- !sql -- -713 - --- !sql -- -18 - diff --git a/regression-test/suites/fault_injection_p0/test_need_read_data_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_need_read_data_fault_injection.groovy deleted file mode 100644 index d7a92f8e7e..0000000000 --- a/regression-test/suites/fault_injection_p0/test_need_read_data_fault_injection.groovy +++ /dev/null @@ -1,94 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("test_need_read_data_fault_injection", "nonConcurrent") { - // define a sql table - def indexTbName = "test_need_read_data_fault_injection" - - sql "DROP TABLE IF EXISTS ${indexTbName}" - sql """ - CREATE TABLE ${indexTbName} ( - `@timestamp` int(11) NULL COMMENT "", - `clientip` varchar(20) NULL COMMENT "", - `request` text NULL COMMENT "", - `status` int(11) NULL COMMENT "", - `size` int(11) NULL COMMENT "", - INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', - INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' - ) ENGINE=OLAP - DUPLICATE KEY(`@timestamp`) - COMMENT "OLAP" - DISTRIBUTED BY RANDOM BUCKETS 1 - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1", - "disable_auto_compaction" = "true" - ); - """ - - def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, - expected_succ_rows = -1, load_to_single_tablet = 'true' -> - - // load the json data - streamLoad { - table "${table_name}" - - // set http request header params - set 'label', label + "_" + UUID.randomUUID().toString() - set 'read_json_by_line', read_flag - set 'format', format_flag - file file_name // import json file - time 10000 // limit inflight 10s - if (expected_succ_rows >= 0) { - set 'max_filter_ratio', '1' - } - - // if declared a check callback, the default check condition will ignore. - // So you must check all condition - check { result, exception, startTime, endTime -> - if (ignore_failure && expected_succ_rows < 0) { return } - if (exception != null) { - throw exception - } - log.info("Stream load result: ${result}".toString()) - def json = parseJson(result) - } - } - } - - try { - load_httplogs_data.call(indexTbName, 'test_need_read_data_fault_injection', 'true', 'json', 'documents-1000.json') - - sql "sync" - - try { - GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._read_columns_by_index") - - qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' or request match_phrase 'jpg' or request match_phrase 'gif'); """ - qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' or request match_phrase 'jpg' and request match_phrase 'gif'); """ - qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' and request match_phrase 'jpg' and request match_phrase 'gif'); """ - qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' and request match_phrase 'jpg' or request match_phrase 'gif'); """ - - qt_sql """ select count() from ${indexTbName} where (clientip match '1' or request match 'jpg' or clientip match '2'); """ - qt_sql """ select count() from ${indexTbName} where (clientip match '3' or request match 'gif' or clientip match '4'); """ - qt_sql """ select count() from ${indexTbName} where (clientip match 'images' or clientip match '5' or clientip match 'english'); """ - - } finally { - GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._read_columns_by_index") - } - } finally { - } -} \ No newline at end of file