[optimize](count) optimize pk exact query without reading data (#28494)
This commit is contained in:
@ -1832,6 +1832,9 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
|
||||
|
||||
for (auto cid : _first_read_column_ids) {
|
||||
auto& column = _current_return_columns[cid];
|
||||
if (_need_read_key_data(cid, column, nrows_read)) {
|
||||
continue;
|
||||
}
|
||||
if (_prune_column(cid, column, true, nrows_read)) {
|
||||
continue;
|
||||
}
|
||||
@ -2575,5 +2578,43 @@ void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
|
||||
}
|
||||
}
|
||||
|
||||
bool SegmentIterator::_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column,
|
||||
size_t nrows_read) {
|
||||
if (_opts.tablet_schema->keys_type() != KeysType::DUP_KEYS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (_opts.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!_opts.tablet_schema->column(cid).is_key()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::set<uint32_t> cids;
|
||||
for (auto* pred : _col_predicates) {
|
||||
cids.insert(pred->column_id());
|
||||
}
|
||||
for (auto* pred : _col_preds_except_leafnode_of_andnode) {
|
||||
cids.insert(pred->column_id());
|
||||
}
|
||||
|
||||
// If the key is present in expr, data needs to be read.
|
||||
if (cids.contains(cid)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (column->is_nullable()) {
|
||||
auto* nullable_col_ptr = reinterpret_cast<vectorized::ColumnNullable*>(column.get());
|
||||
nullable_col_ptr->get_null_map_column().insert_many_defaults(nrows_read);
|
||||
nullable_col_ptr->get_nested_column_ptr()->insert_many_defaults(nrows_read);
|
||||
} else {
|
||||
column->insert_many_defaults(nrows_read);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace segment_v2
|
||||
} // namespace doris
|
||||
|
||||
@ -380,6 +380,8 @@ private:
|
||||
|
||||
Status _convert_to_expected_type(const std::vector<ColumnId>& col_ids);
|
||||
|
||||
bool _need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column, size_t nrows_read);
|
||||
|
||||
class BitmapRangeIterator;
|
||||
class BackwardBitmapRangeIterator;
|
||||
|
||||
|
||||
103
regression-test/data/inverted_index_p0/test_count_on_index_2.out
Normal file
103
regression-test/data/inverted_index_p0/test_count_on_index_2.out
Normal file
@ -0,0 +1,103 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
974
|
||||
|
||||
-- !sql --
|
||||
974
|
||||
|
||||
-- !sql --
|
||||
839
|
||||
|
||||
-- !sql --
|
||||
839
|
||||
|
||||
-- !sql --
|
||||
271
|
||||
|
||||
-- !sql --
|
||||
271
|
||||
|
||||
-- !sql --
|
||||
913
|
||||
|
||||
-- !sql --
|
||||
913
|
||||
|
||||
-- !sql --
|
||||
14
|
||||
|
||||
-- !sql --
|
||||
14
|
||||
|
||||
-- !sql --
|
||||
15
|
||||
|
||||
-- !sql --
|
||||
15
|
||||
|
||||
-- !sql --
|
||||
4
|
||||
|
||||
-- !sql --
|
||||
4
|
||||
|
||||
-- !sql --
|
||||
15
|
||||
|
||||
-- !sql --
|
||||
15
|
||||
|
||||
-- !sql --
|
||||
827
|
||||
|
||||
-- !sql --
|
||||
827
|
||||
|
||||
-- !sql --
|
||||
970
|
||||
|
||||
-- !sql --
|
||||
970
|
||||
|
||||
-- !sql --
|
||||
10
|
||||
|
||||
-- !sql --
|
||||
10
|
||||
|
||||
-- !sql --
|
||||
970
|
||||
|
||||
-- !sql --
|
||||
970
|
||||
|
||||
-- !sql --
|
||||
11
|
||||
|
||||
-- !sql --
|
||||
9
|
||||
|
||||
-- !sql --
|
||||
21
|
||||
|
||||
-- !sql --
|
||||
19
|
||||
|
||||
-- !sql --
|
||||
11
|
||||
|
||||
-- !sql --
|
||||
10
|
||||
|
||||
-- !sql --
|
||||
6
|
||||
|
||||
-- !sql --
|
||||
7
|
||||
|
||||
-- !sql --
|
||||
0
|
||||
|
||||
-- !sql --
|
||||
3
|
||||
|
||||
@ -0,0 +1,205 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
|
||||
suite("test_count_on_index_2", "p0"){
|
||||
def indexTbName1 = "test_count_on_index_2_index"
|
||||
def indexTbName2 = "test_count_on_index_2_no_index"
|
||||
def indexTbName3 = "test_count_on_index_2_pk"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName1}"
|
||||
|
||||
sql """
|
||||
CREATE TABLE ${indexTbName1} (
|
||||
`@timestamp` int(11) NULL COMMENT "",
|
||||
`clientip` varchar(20) NULL COMMENT "",
|
||||
`request` text NULL COMMENT "",
|
||||
`status` int(11) NULL COMMENT "",
|
||||
`size` int(11) NULL COMMENT "",
|
||||
INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '',
|
||||
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
|
||||
INDEX status_idx (`status`) USING INVERTED COMMENT '',
|
||||
INDEX size_idx (`size`) USING INVERTED COMMENT ''
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`@timestamp`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY RANDOM BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName2}"
|
||||
|
||||
sql """
|
||||
CREATE TABLE ${indexTbName2} (
|
||||
`@timestamp` int(11) NULL COMMENT "",
|
||||
`clientip` varchar(20) NULL COMMENT "",
|
||||
`request` text NULL COMMENT "",
|
||||
`status` int(11) NULL COMMENT "",
|
||||
`size` int(11) NULL COMMENT ""
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`@timestamp`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY RANDOM BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName3}"
|
||||
|
||||
sql """
|
||||
CREATE TABLE ${indexTbName3} (
|
||||
`a` int NULL COMMENT "",
|
||||
`b` int NULL COMMENT "",
|
||||
`c` int NULL COMMENT ""
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`a`, `b`, `c`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY RANDOM BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO ${indexTbName3} VALUES
|
||||
(1, 1, 1),
|
||||
(2, 2, 2),
|
||||
(3, 3, 3),
|
||||
(4, 4, 4),
|
||||
(5, 5, 5),
|
||||
(6, 6, 6),
|
||||
(7, 7, 7),
|
||||
(8, 8, 8),
|
||||
(9, 9, 9),
|
||||
(10, 10, 10),
|
||||
(11, 11, 11),
|
||||
(12, 12, 12),
|
||||
(13, 13, 13),
|
||||
(14, 14, 14),
|
||||
(15, 15, 15),
|
||||
(16, 16, 16),
|
||||
(17, 17, 17),
|
||||
(18, 18, 18),
|
||||
(19, 19, 19),
|
||||
(20, 20, 20),
|
||||
(21, 21, 21),
|
||||
(22, 22, 22),
|
||||
(23, 23, 23),
|
||||
(24, 24, 24),
|
||||
(25, 25, 25),
|
||||
(26, 26, 26),
|
||||
(27, 27, 27),
|
||||
(28, 28, 28),
|
||||
(29, 29, 29),
|
||||
(30, 30, 30);
|
||||
"""
|
||||
|
||||
def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
|
||||
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
|
||||
|
||||
// load the json data
|
||||
streamLoad {
|
||||
table "${table_name}"
|
||||
|
||||
// set http request header params
|
||||
set 'label', label + "_" + UUID.randomUUID().toString()
|
||||
set 'read_json_by_line', read_flag
|
||||
set 'format', format_flag
|
||||
file file_name // import json file
|
||||
time 10000 // limit inflight 10s
|
||||
if (expected_succ_rows >= 0) {
|
||||
set 'max_filter_ratio', '1'
|
||||
}
|
||||
|
||||
// if declared a check callback, the default check condition will ignore.
|
||||
// So you must check all condition
|
||||
check { result, exception, startTime, endTime ->
|
||||
if (ignore_failure && expected_succ_rows < 0) { return }
|
||||
if (exception != null) {
|
||||
throw exception
|
||||
}
|
||||
log.info("Stream load result: ${result}".toString())
|
||||
def json = parseJson(result)
|
||||
assertEquals("success", json.Status.toLowerCase())
|
||||
if (expected_succ_rows >= 0) {
|
||||
assertEquals(json.NumberLoadedRows, expected_succ_rows)
|
||||
} else {
|
||||
assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows)
|
||||
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 'documents-1000.json')
|
||||
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453; """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453; """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images'); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' and request match 'english'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' and request match 'english'); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' or request match 'english'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' or request match 'english'); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0'); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' or clientip = '252.0.0.0'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' or clientip = '252.0.0.0'); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and request match 'hm'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and request match 'hm'); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and clientip in ('247.37.0.0', '252.0.0.0'); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and clientip in ('247.37.0.0', '252.0.0.0'); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200 or status = 304); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200 or status = 304); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and status = 200); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and status = 200); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and status in (200, 304); """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and status in (200, 304); """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName3} where (a >= 5 and a <= 15); """
|
||||
qt_sql """ select count() from ${indexTbName3} where (a > 5 and a < 15); """
|
||||
qt_sql """ select count() from ${indexTbName3} where (a >= 7 and a <= 27); """
|
||||
qt_sql """ select count() from ${indexTbName3} where (a > 7 and a < 27); """
|
||||
qt_sql """ select count() from ${indexTbName3} where (a >= 7 and a <= 27) and (b >= 10 and b <= 20); """
|
||||
qt_sql """ select count() from ${indexTbName3} where (a >= 7 and a < 27) and (b >= 10 and b < 20); """
|
||||
qt_sql """ select count() from ${indexTbName3} where (a >= 7 and a <= 27) and (b >= 10 and b < 20) and (c >= 12 and c < 18); """
|
||||
qt_sql """ select count() from ${indexTbName3} where (a >= 2 and a < 28) and (b >= 5 and b < 20) and (c >= 8 and c < 15); """
|
||||
qt_sql """ select count() from ${indexTbName3} where (a >= 10 and a < 20) and (b >= 5 and b < 14) and (c >= 16 and c < 25); """
|
||||
qt_sql """ select count() from ${indexTbName3} where (a >= 10 and a < 20) and (b >= 5 and b < 16) and (c >= 13 and c < 25); """
|
||||
|
||||
} finally {
|
||||
//try_sql("DROP TABLE IF EXISTS ${testTable}")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user