[enhancement](index) Nereids support no need to read raw data for index column that only in filter conditions (#20605)
This commit is contained in:
@ -250,6 +250,10 @@ Status SegmentIterator::init(const StorageReadOptions& opts) {
|
||||
if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) {
|
||||
_vec_init_char_column_id();
|
||||
}
|
||||
|
||||
if (opts.output_columns != nullptr) {
|
||||
_output_columns = *(opts.output_columns);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -917,7 +921,20 @@ Status SegmentIterator::_apply_inverted_index_on_block_column_predicate(
|
||||
}
|
||||
|
||||
bool SegmentIterator::_need_read_data(ColumnId cid) {
|
||||
// TODO(xk) impl right logic
|
||||
if (_output_columns.count(-1)) {
|
||||
// if _output_columns contains -1, it means that the light
|
||||
// weight schema change may not be enabled or other reasons
|
||||
// caused the column unique_id not be set, to prevent errors
|
||||
// occurring, return true here that column data needs to be read
|
||||
return true;
|
||||
}
|
||||
int32_t unique_id = _opts.tablet_schema->column(cid).unique_id();
|
||||
if (_need_read_data_indices.count(unique_id) > 0 && !_need_read_data_indices[unique_id] &&
|
||||
_output_columns.count(unique_id) < 1) {
|
||||
VLOG_DEBUG << "SegmentIterator no need read data for column: "
|
||||
<< _opts.tablet_schema->column_by_uid(unique_id).name();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -431,6 +431,7 @@ private:
|
||||
std::vector<ColumnPredicate*> _filter_info_id;
|
||||
bool _record_rowids = false;
|
||||
int32_t _tablet_id = 0;
|
||||
std::set<int32_t> _output_columns;
|
||||
};
|
||||
|
||||
} // namespace segment_v2
|
||||
|
||||
@ -440,9 +440,6 @@ Status NewOlapScanNode::_init_scanners(std::list<VScannerSPtr>* scanners) {
|
||||
|
||||
if (!_olap_scan_node.output_column_unique_ids.empty()) {
|
||||
for (auto uid : _olap_scan_node.output_column_unique_ids) {
|
||||
if (uid < 0) {
|
||||
continue;
|
||||
}
|
||||
_maybe_read_column_ids.emplace(uid);
|
||||
}
|
||||
}
|
||||
|
||||
@ -572,50 +572,16 @@ public class OriginalPlanner extends Planner {
|
||||
* column unique id for `A` and `B` will put into outputColumnUniqueIds.
|
||||
*
|
||||
*/
|
||||
// this opt will only work with nereidsPlanner
|
||||
private void pushOutColumnUniqueIdsToOlapScan(PlanFragment rootFragment, Analyzer analyzer) {
|
||||
Set<Integer> outputColumnUniqueIds = new HashSet<>();
|
||||
ArrayList<Expr> outputExprs = rootFragment.getOutputExprs();
|
||||
for (Expr expr : outputExprs) {
|
||||
if (expr instanceof SlotRef) {
|
||||
if (((SlotRef) expr).getColumn() != null) {
|
||||
outputColumnUniqueIds.add(((SlotRef) expr).getColumn().getUniqueId());
|
||||
}
|
||||
}
|
||||
}
|
||||
// add '-1' to avoid the optimization incorrect work with OriginalPlanner,
|
||||
// because in the storage layer will skip this optimization if outputColumnUniqueIds contains '-1',
|
||||
// to ensure the optimization only correct work with nereidsPlanner
|
||||
outputColumnUniqueIds.add(-1);
|
||||
|
||||
for (PlanFragment fragment : fragments) {
|
||||
PlanNode node = fragment.getPlanRoot();
|
||||
PlanNode parent = null;
|
||||
while (node.getChildren().size() != 0) {
|
||||
for (PlanNode childNode : node.getChildren()) {
|
||||
List<SlotId> outputSlotIds = childNode.getOutputSlotIds();
|
||||
if (outputSlotIds != null) {
|
||||
for (SlotId sid : outputSlotIds) {
|
||||
SlotDescriptor slotDesc = analyzer.getSlotDesc(sid);
|
||||
outputColumnUniqueIds.add(slotDesc.getUniqueId());
|
||||
}
|
||||
}
|
||||
}
|
||||
// OlapScanNode is the last node.
|
||||
// So, just get the two node and check if they are SortNode and OlapScan.
|
||||
parent = node;
|
||||
node = node.getChildren().get(0);
|
||||
}
|
||||
|
||||
if (parent instanceof SortNode) {
|
||||
SortNode sortNode = (SortNode) parent;
|
||||
List<Expr> orderingExprs = sortNode.getSortInfo().getOrigOrderingExprs();
|
||||
if (orderingExprs != null) {
|
||||
for (Expr expr : orderingExprs) {
|
||||
if (expr instanceof SlotRef) {
|
||||
if (((SlotRef) expr).getColumn() != null) {
|
||||
outputColumnUniqueIds.add(((SlotRef) expr).getColumn().getUniqueId());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!(node instanceof OlapScanNode)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -0,0 +1,129 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !select_nereids_0 --
|
||||
1 \N addr qie3 yy lj 100
|
||||
2 \N hehe \N lala 200
|
||||
3 beijing addr xuanwu wugui \N 300
|
||||
4 beijing addr fengtai fengtai1 fengtai2 \N
|
||||
5 beijing addr chaoyang wangjing donghuqu 500
|
||||
6 shanghai hehe \N haha \N
|
||||
7 tengxun qie addr gg lj \N
|
||||
8 tengxun2 qie \N lj 800
|
||||
|
||||
-- !select_nereids_1 --
|
||||
4
|
||||
|
||||
-- !select_nereids_2 --
|
||||
3
|
||||
|
||||
-- !select_nereids_3 --
|
||||
3
|
||||
|
||||
-- !select_nereids_4 --
|
||||
3 beijing addr xuanwu wugui \N 300
|
||||
4 beijing addr fengtai fengtai1 fengtai2 \N
|
||||
5 beijing addr chaoyang wangjing donghuqu 500
|
||||
|
||||
-- !select_nereids_5 --
|
||||
beijing addr xuanwu wugui
|
||||
beijing addr fengtai fengtai1
|
||||
beijing addr chaoyang wangjing
|
||||
|
||||
-- !select_nereids_6 --
|
||||
hehe \N
|
||||
qie addr gg
|
||||
qie \N
|
||||
|
||||
-- !select_nereids_7 --
|
||||
hehe \N
|
||||
qie addr gg
|
||||
qie \N
|
||||
|
||||
-- !select_nereids_8 --
|
||||
SHANGHAI \N
|
||||
TENGXUN addr gg
|
||||
TENGXUN2 \N
|
||||
|
||||
-- !select_nereids_9 --
|
||||
4 \N
|
||||
3 addr gg
|
||||
3 \N
|
||||
|
||||
-- !select_nereids_10 --
|
||||
hehe \N
|
||||
qie addr gg
|
||||
qie \N
|
||||
|
||||
-- !select_nereids_11 --
|
||||
hehe \N SHANGHAI
|
||||
qie addr gg TENGXUN
|
||||
qie \N TENGXUN2
|
||||
|
||||
-- !select_nereids_12 --
|
||||
300
|
||||
\N
|
||||
500
|
||||
|
||||
-- !select_0 --
|
||||
1 \N addr qie3 yy lj 100
|
||||
2 \N hehe \N lala 200
|
||||
3 beijing addr xuanwu wugui \N 300
|
||||
4 beijing addr fengtai fengtai1 fengtai2 \N
|
||||
5 beijing addr chaoyang wangjing donghuqu 500
|
||||
6 shanghai hehe \N haha \N
|
||||
7 tengxun qie addr gg lj \N
|
||||
8 tengxun2 qie \N lj 800
|
||||
|
||||
-- !select_1 --
|
||||
4
|
||||
|
||||
-- !select_2 --
|
||||
3
|
||||
|
||||
-- !select_3 --
|
||||
3
|
||||
|
||||
-- !select_4 --
|
||||
3 beijing addr xuanwu wugui \N 300
|
||||
4 beijing addr fengtai fengtai1 fengtai2 \N
|
||||
5 beijing addr chaoyang wangjing donghuqu 500
|
||||
|
||||
-- !select_5 --
|
||||
beijing addr xuanwu wugui
|
||||
beijing addr fengtai fengtai1
|
||||
beijing addr chaoyang wangjing
|
||||
|
||||
-- !select_6 --
|
||||
hehe \N
|
||||
qie addr gg
|
||||
qie \N
|
||||
|
||||
-- !select_7 --
|
||||
hehe \N
|
||||
qie addr gg
|
||||
qie \N
|
||||
|
||||
-- !select_8 --
|
||||
SHANGHAI \N
|
||||
TENGXUN addr gg
|
||||
TENGXUN2 \N
|
||||
|
||||
-- !select_9 --
|
||||
4 \N
|
||||
3 addr gg
|
||||
3 \N
|
||||
|
||||
-- !select_10 --
|
||||
hehe \N
|
||||
qie addr gg
|
||||
qie \N
|
||||
|
||||
-- !select_11 --
|
||||
hehe \N SHANGHAI
|
||||
qie addr gg TENGXUN
|
||||
qie \N TENGXUN2
|
||||
|
||||
-- !select_12 --
|
||||
300
|
||||
\N
|
||||
500
|
||||
|
||||
@ -0,0 +1,88 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
suite("test_index_no_need_read_data", "inverted_index_select"){
|
||||
def table1 = "test_index_no_need_read_data"
|
||||
|
||||
sql "drop table if exists ${table1}"
|
||||
|
||||
sql """
|
||||
CREATE TABLE IF NOT EXISTS `${table1}` (
|
||||
`id` int NULL COMMENT "",
|
||||
`city` varchar(20) NULL COMMENT "",
|
||||
`addr` varchar(20) NULL COMMENT "",
|
||||
`name` varchar(20) NULL COMMENT "",
|
||||
`compy` varchar(20) NULL COMMENT "",
|
||||
`n` int NULL COMMENT "",
|
||||
INDEX idx_city(city) USING INVERTED,
|
||||
INDEX idx_addr(addr) USING INVERTED PROPERTIES("parser"="english"),
|
||||
INDEX idx_n(n) USING INVERTED
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`id`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY HASH(`id`) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"in_memory" = "false",
|
||||
"storage_format" = "V2"
|
||||
)
|
||||
"""
|
||||
|
||||
sql """insert into ${table1} values
|
||||
(1,null,'addr qie3','yy','lj',100),
|
||||
(2,null,'hehe',null,'lala',200),
|
||||
(3,'beijing','addr xuanwu','wugui',null,300),
|
||||
(4,'beijing','addr fengtai','fengtai1','fengtai2',null),
|
||||
(5,'beijing','addr chaoyang','wangjing','donghuqu',500),
|
||||
(6,'shanghai','hehe',null,'haha',null),
|
||||
(7,'tengxun','qie','addr gg','lj',null),
|
||||
(8,'tengxun2','qie',null,'lj',800)
|
||||
"""
|
||||
|
||||
// case1: enable nereids planner
|
||||
sql "set enable_nereids_planner = true"
|
||||
|
||||
qt_select_nereids_0 "SELECT * FROM ${table1} ORDER BY id"
|
||||
qt_select_nereids_1 "SELECT count() FROM ${table1} WHERE n > 100"
|
||||
qt_select_nereids_2 "SELECT count() FROM ${table1} WHERE city = 'beijing'"
|
||||
qt_select_nereids_3 "SELECT count(*) FROM ${table1} WHERE city = 'beijing'"
|
||||
qt_select_nereids_4 "SELECT * FROM ${table1} WHERE city = 'beijing' ORDER BY id"
|
||||
qt_select_nereids_5 "SELECT city, addr, name FROM ${table1} WHERE city = 'beijing' ORDER BY id"
|
||||
qt_select_nereids_6 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY city"
|
||||
qt_select_nereids_7 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY id"
|
||||
qt_select_nereids_8 "SELECT upper(city), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
|
||||
qt_select_nereids_9 "SELECT length(addr), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
|
||||
qt_select_nereids_10 "SELECT addr, name FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
|
||||
qt_select_nereids_11 "SELECT addr, name, upper(city) FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
|
||||
qt_select_nereids_12 "SELECT sum(n) FROM ${table1} WHERE city = 'beijing' group by id ORDER BY id"
|
||||
|
||||
// case2: disable nereids planner
|
||||
sql "set enable_nereids_planner = false"
|
||||
|
||||
qt_select_0 "SELECT * FROM ${table1} ORDER BY id"
|
||||
qt_select_1 "SELECT count() FROM ${table1} WHERE n > 100"
|
||||
qt_select_2 "SELECT count() FROM ${table1} WHERE city = 'beijing'"
|
||||
qt_select_3 "SELECT count(*) FROM ${table1} WHERE city = 'beijing'"
|
||||
qt_select_4 "SELECT * FROM ${table1} WHERE city = 'beijing' ORDER BY id"
|
||||
qt_select_5 "SELECT city, addr, name FROM ${table1} WHERE city = 'beijing' ORDER BY id"
|
||||
qt_select_6 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY city"
|
||||
qt_select_7 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY id"
|
||||
qt_select_8 "SELECT upper(city), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
|
||||
qt_select_9 "SELECT length(addr), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
|
||||
qt_select_10 "SELECT addr, name FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
|
||||
qt_select_11 "SELECT addr, name, upper(city) FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
|
||||
qt_select_12 "SELECT sum(n) FROM ${table1} WHERE city = 'beijing' group by id ORDER BY id"
|
||||
}
|
||||
Reference in New Issue
Block a user