[opt](inverted index) mow supports index optimization #(#38180)
## Proposed changes https://github.com/apache/doris/pull/37428 https://github.com/apache/doris/pull/37429 <!--Describe your changes.-->
This commit is contained in:
@ -1980,11 +1980,12 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
|
||||
auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
|
||||
"segment_iterator._read_columns_by_index", "column_name", "");
|
||||
if (debug_col_name.empty()) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("{} does not need to read data");
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("does not need to read data");
|
||||
}
|
||||
auto col_name = _opts.tablet_schema->column(cid).name();
|
||||
if (debug_col_name.find(col_name) != std::string::npos) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("{} does not need to read data");
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("does not need to read data, {}",
|
||||
debug_col_name);
|
||||
}
|
||||
})
|
||||
|
||||
@ -2179,9 +2180,27 @@ Status SegmentIterator::_read_columns_by_rowids(std::vector<ColumnId>& read_colu
|
||||
}
|
||||
|
||||
for (auto cid : read_column_ids) {
|
||||
if (_prune_column(cid, (*mutable_columns)[cid], true, select_size)) {
|
||||
auto& colunm = (*mutable_columns)[cid];
|
||||
if (_no_need_read_key_data(cid, colunm, select_size)) {
|
||||
continue;
|
||||
}
|
||||
if (_prune_column(cid, colunm, true, select_size)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DBUG_EXECUTE_IF("segment_iterator._read_columns_by_index", {
|
||||
auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
|
||||
"segment_iterator._read_columns_by_index", "column_name", "");
|
||||
if (debug_col_name.empty()) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("does not need to read data");
|
||||
}
|
||||
auto col_name = _opts.tablet_schema->column(cid).name();
|
||||
if (debug_col_name.find(col_name) != std::string::npos) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("does not need to read data, {}",
|
||||
debug_col_name);
|
||||
}
|
||||
})
|
||||
|
||||
RETURN_IF_ERROR(_column_iterators[cid]->read_by_rowids(rowids.data(), select_size,
|
||||
_current_return_columns[cid]));
|
||||
}
|
||||
@ -2794,10 +2813,9 @@ void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
|
||||
|
||||
bool SegmentIterator::_no_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column,
|
||||
size_t nrows_read) {
|
||||
if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_no_need_read_data_opt) {
|
||||
return false;
|
||||
}
|
||||
if (_opts.tablet_schema->keys_type() != KeysType::DUP_KEYS) {
|
||||
if (!((_opts.tablet_schema->keys_type() == KeysType::DUP_KEYS ||
|
||||
(_opts.tablet_schema->keys_type() == KeysType::UNIQUE_KEYS &&
|
||||
_opts.enable_unique_key_merge_on_write)))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -2855,11 +2873,20 @@ bool SegmentIterator::_can_opt_topn_reads() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!_col_predicates.empty() || !_col_preds_except_leafnode_of_andnode.empty()) {
|
||||
return false;
|
||||
std::set<uint32_t> cids;
|
||||
for (auto* pred : _col_predicates) {
|
||||
cids.insert(pred->column_id());
|
||||
}
|
||||
for (auto* pred : _col_preds_except_leafnode_of_andnode) {
|
||||
cids.insert(pred->column_id());
|
||||
}
|
||||
|
||||
return true;
|
||||
uint32_t delete_sign_idx = _opts.tablet_schema->delete_sign_idx();
|
||||
bool result = std::ranges::all_of(cids.begin(), cids.end(), [delete_sign_idx](auto cid) {
|
||||
return cid == delete_sign_idx;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace segment_v2
|
||||
|
||||
@ -604,6 +604,8 @@ bool VExpr::fast_execute(Block& block, const ColumnNumbers& arguments, size_t re
|
||||
size_t input_rows_count, const std::string& function_name) {
|
||||
std::string result_column_name = gen_predicate_result_sign(block, arguments, function_name);
|
||||
if (!block.has(result_column_name)) {
|
||||
DBUG_EXECUTE_IF("segment_iterator.fast_execute",
|
||||
{ return Status::Error<ErrorCode::INTERNAL_ERROR>("fast_execute failed"); })
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,31 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
210
|
||||
|
||||
-- !sql --
|
||||
2
|
||||
|
||||
-- !sql --
|
||||
4
|
||||
|
||||
-- !sql --
|
||||
29
|
||||
|
||||
-- !sql --
|
||||
14
|
||||
|
||||
-- !sql --
|
||||
120
|
||||
|
||||
-- !sql --
|
||||
2
|
||||
|
||||
-- !sql --
|
||||
4
|
||||
|
||||
-- !sql --
|
||||
22
|
||||
|
||||
-- !sql --
|
||||
11
|
||||
|
||||
@ -0,0 +1,25 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
893964617 40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736
|
||||
|
||||
-- !sql --
|
||||
893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736
|
||||
|
||||
-- !sql --
|
||||
893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736
|
||||
|
||||
-- !sql --
|
||||
893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736
|
||||
|
||||
-- !sql --
|
||||
893964617 40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736
|
||||
|
||||
-- !sql --
|
||||
893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736
|
||||
|
||||
-- !sql --
|
||||
893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736
|
||||
|
||||
-- !sql --
|
||||
893964672 26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0 200 24736
|
||||
|
||||
@ -0,0 +1,122 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_all_index_hit_fault_injection", "nonConcurrent") {
|
||||
// define a sql table
|
||||
def indexTbName1 = "test_all_index_hit_fault_injection_1"
|
||||
def indexTbName2 = "test_all_index_hit_fault_injection_2"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName1}"
|
||||
sql """
|
||||
CREATE TABLE ${indexTbName1} (
|
||||
`@timestamp` int(11) NULL COMMENT "",
|
||||
`clientip` varchar(20) NULL COMMENT "",
|
||||
`request` text NULL COMMENT "",
|
||||
`status` int(11) NULL COMMENT "",
|
||||
`size` int(11) NULL COMMENT "",
|
||||
INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '',
|
||||
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`@timestamp`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY RANDOM BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"disable_auto_compaction" = "true"
|
||||
);
|
||||
"""
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName2}"
|
||||
sql """
|
||||
CREATE TABLE ${indexTbName2} (
|
||||
`@timestamp` int(11) NULL COMMENT "",
|
||||
`clientip` varchar(20) NULL COMMENT "",
|
||||
`request` text NULL COMMENT "",
|
||||
`status` int(11) NULL COMMENT "",
|
||||
`size` int(11) NULL COMMENT "",
|
||||
INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '',
|
||||
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
|
||||
) ENGINE=OLAP
|
||||
UNIQUE KEY(`@timestamp`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"enable_unique_key_merge_on_write" = "true",
|
||||
"disable_auto_compaction" = "true"
|
||||
);
|
||||
"""
|
||||
|
||||
def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
|
||||
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
|
||||
|
||||
// load the json data
|
||||
streamLoad {
|
||||
table "${table_name}"
|
||||
|
||||
// set http request header params
|
||||
set 'label', label + "_" + UUID.randomUUID().toString()
|
||||
set 'read_json_by_line', read_flag
|
||||
set 'format', format_flag
|
||||
file file_name // import json file
|
||||
time 10000 // limit inflight 10s
|
||||
if (expected_succ_rows >= 0) {
|
||||
set 'max_filter_ratio', '1'
|
||||
}
|
||||
|
||||
// if declared a check callback, the default check condition will ignore.
|
||||
// So you must check all condition
|
||||
check { result, exception, startTime, endTime ->
|
||||
if (ignore_failure && expected_succ_rows < 0) { return }
|
||||
if (exception != null) {
|
||||
throw exception
|
||||
}
|
||||
log.info("Stream load result: ${result}".toString())
|
||||
def json = parseJson(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
load_httplogs_data.call(indexTbName1, 'test_all_index_hit_fault_injection_1', 'true', 'json', 'documents-1000.json')
|
||||
load_httplogs_data.call(indexTbName2, 'test_all_index_hit_fault_injection_2', 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
|
||||
try {
|
||||
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._read_columns_by_index", [column_name: "clientip,request"])
|
||||
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator.fast_execute")
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where (request match_phrase 'hm'); """
|
||||
qt_sql """ select count() from ${indexTbName1} where (request match_phrase 'hm' and clientip = '126.1.0.0'); """
|
||||
qt_sql """ select count() from ${indexTbName1} where (request match_phrase 'hm' and clientip = '126.1.0.0') or (request match_phrase 'bg' and clientip = '201.0.0.0'); """
|
||||
qt_sql """ select count() from ${indexTbName1} where (request match_phrase 'hm' and clientip = '126.1.0.0' or clientip = '247.37.0.0') or (request match_phrase 'bg' and clientip = '201.0.0.0' or clientip = '232.0.0.0'); """
|
||||
qt_sql """ select count() from ${indexTbName1} where (request match_phrase 'hm' and clientip in ('126.1.0.0', '247.37.0.0')) or (request match_phrase 'bg' and clientip in ('201.0.0.0', '232.0.0.0')); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName2} where (request match_phrase 'hm'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where (request match_phrase 'hm' and clientip = '126.1.0.0'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where (request match_phrase 'hm' and clientip = '126.1.0.0') or (request match_phrase 'bg' and clientip = '201.0.0.0'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where (request match_phrase 'hm' and clientip = '126.1.0.0' or clientip = '247.37.0.0') or (request match_phrase 'bg' and clientip = '201.0.0.0' or clientip = '232.0.0.0'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where (request match_phrase 'hm' and clientip in ('126.1.0.0', '247.37.0.0')) or (request match_phrase 'bg' and clientip in ('201.0.0.0', '232.0.0.0')); """
|
||||
|
||||
} finally {
|
||||
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
|
||||
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator.fast_execute")
|
||||
}
|
||||
} finally {
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,117 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_topn_fault_injection", "nonConcurrent") {
|
||||
// define a sql table
|
||||
def indexTbName1 = "test_topn_fault_injection1"
|
||||
def indexTbName2 = "test_topn_fault_injection2"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName1}"
|
||||
sql """
|
||||
CREATE TABLE ${indexTbName1} (
|
||||
`@timestamp` int(11) NULL COMMENT "",
|
||||
`clientip` varchar(20) NULL COMMENT "",
|
||||
`request` text NULL COMMENT "",
|
||||
`status` int(11) NULL COMMENT "",
|
||||
`size` int(11) NULL COMMENT "",
|
||||
INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
|
||||
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
|
||||
) ENGINE=OLAP
|
||||
UNIQUE KEY(`@timestamp`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"enable_unique_key_merge_on_write" = "true",
|
||||
"disable_auto_compaction" = "true"
|
||||
);
|
||||
"""
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName2}"
|
||||
sql """
|
||||
CREATE TABLE ${indexTbName2} (
|
||||
`@timestamp` int(11) NULL COMMENT "",
|
||||
`clientip` varchar(20) NULL COMMENT "",
|
||||
`request` text NULL COMMENT "",
|
||||
`status` int(11) NULL COMMENT "",
|
||||
`size` int(11) NULL COMMENT "",
|
||||
INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
|
||||
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`@timestamp`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY RANDOM BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"disable_auto_compaction" = "true"
|
||||
);
|
||||
"""
|
||||
|
||||
def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
|
||||
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
|
||||
|
||||
// load the json data
|
||||
streamLoad {
|
||||
table "${table_name}"
|
||||
|
||||
// set http request header params
|
||||
set 'label', label + "_" + UUID.randomUUID().toString()
|
||||
set 'read_json_by_line', read_flag
|
||||
set 'format', format_flag
|
||||
file file_name // import json file
|
||||
time 10000 // limit inflight 10s
|
||||
if (expected_succ_rows >= 0) {
|
||||
set 'max_filter_ratio', '1'
|
||||
}
|
||||
|
||||
// if declared a check callback, the default check condition will ignore.
|
||||
// So you must check all condition
|
||||
check { result, exception, startTime, endTime ->
|
||||
if (ignore_failure && expected_succ_rows < 0) { return }
|
||||
if (exception != null) {
|
||||
throw exception
|
||||
}
|
||||
log.info("Stream load result: ${result}".toString())
|
||||
def json = parseJson(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
load_httplogs_data.call(indexTbName1, 'test_topn_fault_injection1', 'true', 'json', 'documents-1000.json')
|
||||
load_httplogs_data.call(indexTbName2, 'test_topn_fault_injection2', 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
|
||||
try {
|
||||
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator.topn_opt")
|
||||
|
||||
qt_sql """ select * from ${indexTbName1} where (request match_phrase 'hm') order by `@timestamp` limit 1; """
|
||||
qt_sql """ select * from ${indexTbName1} where (request match_phrase 'hm' and clientip match_phrase '1') order by `@timestamp` limit 1; """
|
||||
qt_sql """ select * from ${indexTbName1} where (request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'bg' and clientip match_phrase '2') order by `@timestamp` limit 1; """
|
||||
qt_sql """ select * from ${indexTbName1} where (request match_phrase 'hm' and clientip match_phrase '1' or clientip match_phrase '3') or (request match_phrase 'bg' and clientip match_phrase '2' or clientip match_phrase '4') order by `@timestamp` limit 1; """
|
||||
|
||||
qt_sql """ select * from ${indexTbName2} where (request match_phrase 'hm') order by `@timestamp` limit 1; """
|
||||
qt_sql """ select * from ${indexTbName2} where (request match_phrase 'hm' and clientip match_phrase '1') order by `@timestamp` limit 1; """
|
||||
qt_sql """ select * from ${indexTbName2} where (request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'bg' and clientip match_phrase '2') order by `@timestamp` limit 1; """
|
||||
qt_sql """ select * from ${indexTbName2} where (request match_phrase 'hm' and clientip match_phrase '1' or clientip match_phrase '3') or (request match_phrase 'bg' and clientip match_phrase '2' or clientip match_phrase '4') order by `@timestamp` limit 1; """
|
||||
} finally {
|
||||
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator.topn_opt")
|
||||
}
|
||||
} finally {
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user