[enhancement](index) Nereids support no need to read raw data for index column that only in filter conditions (#20605)

This commit is contained in:
YueW
2023-06-09 21:54:48 +08:00
committed by GitHub
parent 0f21166110
commit 656b9ad3da
6 changed files with 241 additions and 43 deletions

View File

@ -250,6 +250,10 @@ Status SegmentIterator::init(const StorageReadOptions& opts) {
if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) {
_vec_init_char_column_id();
}
if (opts.output_columns != nullptr) {
_output_columns = *(opts.output_columns);
}
return Status::OK();
}
@ -917,7 +921,20 @@ Status SegmentIterator::_apply_inverted_index_on_block_column_predicate(
}
bool SegmentIterator::_need_read_data(ColumnId cid) {
// TODO(xk) impl right logic
if (_output_columns.count(-1)) {
// if _output_columns contains -1, it means that the light
// weight schema change may not be enabled or other reasons
// caused the column unique_id not be set, to prevent errors
// occurring, return true here that column data needs to be read
return true;
}
int32_t unique_id = _opts.tablet_schema->column(cid).unique_id();
if (_need_read_data_indices.count(unique_id) > 0 && !_need_read_data_indices[unique_id] &&
_output_columns.count(unique_id) < 1) {
VLOG_DEBUG << "SegmentIterator no need read data for column: "
<< _opts.tablet_schema->column_by_uid(unique_id).name();
return false;
}
return true;
}

View File

@ -431,6 +431,7 @@ private:
std::vector<ColumnPredicate*> _filter_info_id;
bool _record_rowids = false;
int32_t _tablet_id = 0;
std::set<int32_t> _output_columns;
};
} // namespace segment_v2

View File

@ -440,9 +440,6 @@ Status NewOlapScanNode::_init_scanners(std::list<VScannerSPtr>* scanners) {
if (!_olap_scan_node.output_column_unique_ids.empty()) {
for (auto uid : _olap_scan_node.output_column_unique_ids) {
if (uid < 0) {
continue;
}
_maybe_read_column_ids.emplace(uid);
}
}

View File

@ -572,50 +572,16 @@ public class OriginalPlanner extends Planner {
* column unique id for `A` and `B` will put into outputColumnUniqueIds.
*
*/
// this opt will only work with nereidsPlanner
private void pushOutColumnUniqueIdsToOlapScan(PlanFragment rootFragment, Analyzer analyzer) {
Set<Integer> outputColumnUniqueIds = new HashSet<>();
ArrayList<Expr> outputExprs = rootFragment.getOutputExprs();
for (Expr expr : outputExprs) {
if (expr instanceof SlotRef) {
if (((SlotRef) expr).getColumn() != null) {
outputColumnUniqueIds.add(((SlotRef) expr).getColumn().getUniqueId());
}
}
}
// add '-1' to avoid the optimization incorrect work with OriginalPlanner,
// because in the storage layer will skip this optimization if outputColumnUniqueIds contains '-1',
// to ensure the optimization only correct work with nereidsPlanner
outputColumnUniqueIds.add(-1);
for (PlanFragment fragment : fragments) {
PlanNode node = fragment.getPlanRoot();
PlanNode parent = null;
while (node.getChildren().size() != 0) {
for (PlanNode childNode : node.getChildren()) {
List<SlotId> outputSlotIds = childNode.getOutputSlotIds();
if (outputSlotIds != null) {
for (SlotId sid : outputSlotIds) {
SlotDescriptor slotDesc = analyzer.getSlotDesc(sid);
outputColumnUniqueIds.add(slotDesc.getUniqueId());
}
}
}
// OlapScanNode is the last node.
// So, just get the two node and check if they are SortNode and OlapScan.
parent = node;
node = node.getChildren().get(0);
}
if (parent instanceof SortNode) {
SortNode sortNode = (SortNode) parent;
List<Expr> orderingExprs = sortNode.getSortInfo().getOrigOrderingExprs();
if (orderingExprs != null) {
for (Expr expr : orderingExprs) {
if (expr instanceof SlotRef) {
if (((SlotRef) expr).getColumn() != null) {
outputColumnUniqueIds.add(((SlotRef) expr).getColumn().getUniqueId());
}
}
}
}
}
if (!(node instanceof OlapScanNode)) {
continue;
}

View File

@ -0,0 +1,129 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_nereids_0 --
1 \N addr qie3 yy lj 100
2 \N hehe \N lala 200
3 beijing addr xuanwu wugui \N 300
4 beijing addr fengtai fengtai1 fengtai2 \N
5 beijing addr chaoyang wangjing donghuqu 500
6 shanghai hehe \N haha \N
7 tengxun qie addr gg lj \N
8 tengxun2 qie \N lj 800
-- !select_nereids_1 --
4
-- !select_nereids_2 --
3
-- !select_nereids_3 --
3
-- !select_nereids_4 --
3 beijing addr xuanwu wugui \N 300
4 beijing addr fengtai fengtai1 fengtai2 \N
5 beijing addr chaoyang wangjing donghuqu 500
-- !select_nereids_5 --
beijing addr xuanwu wugui
beijing addr fengtai fengtai1
beijing addr chaoyang wangjing
-- !select_nereids_6 --
hehe \N
qie addr gg
qie \N
-- !select_nereids_7 --
hehe \N
qie addr gg
qie \N
-- !select_nereids_8 --
SHANGHAI \N
TENGXUN addr gg
TENGXUN2 \N
-- !select_nereids_9 --
4 \N
3 addr gg
3 \N
-- !select_nereids_10 --
hehe \N
qie addr gg
qie \N
-- !select_nereids_11 --
hehe \N SHANGHAI
qie addr gg TENGXUN
qie \N TENGXUN2
-- !select_nereids_12 --
300
\N
500
-- !select_0 --
1 \N addr qie3 yy lj 100
2 \N hehe \N lala 200
3 beijing addr xuanwu wugui \N 300
4 beijing addr fengtai fengtai1 fengtai2 \N
5 beijing addr chaoyang wangjing donghuqu 500
6 shanghai hehe \N haha \N
7 tengxun qie addr gg lj \N
8 tengxun2 qie \N lj 800
-- !select_1 --
4
-- !select_2 --
3
-- !select_3 --
3
-- !select_4 --
3 beijing addr xuanwu wugui \N 300
4 beijing addr fengtai fengtai1 fengtai2 \N
5 beijing addr chaoyang wangjing donghuqu 500
-- !select_5 --
beijing addr xuanwu wugui
beijing addr fengtai fengtai1
beijing addr chaoyang wangjing
-- !select_6 --
hehe \N
qie addr gg
qie \N
-- !select_7 --
hehe \N
qie addr gg
qie \N
-- !select_8 --
SHANGHAI \N
TENGXUN addr gg
TENGXUN2 \N
-- !select_9 --
4 \N
3 addr gg
3 \N
-- !select_10 --
hehe \N
qie addr gg
qie \N
-- !select_11 --
hehe \N SHANGHAI
qie addr gg TENGXUN
qie \N TENGXUN2
-- !select_12 --
300
\N
500

View File

@ -0,0 +1,88 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_index_no_need_read_data", "inverted_index_select"){
def table1 = "test_index_no_need_read_data"
sql "drop table if exists ${table1}"
sql """
CREATE TABLE IF NOT EXISTS `${table1}` (
`id` int NULL COMMENT "",
`city` varchar(20) NULL COMMENT "",
`addr` varchar(20) NULL COMMENT "",
`name` varchar(20) NULL COMMENT "",
`compy` varchar(20) NULL COMMENT "",
`n` int NULL COMMENT "",
INDEX idx_city(city) USING INVERTED,
INDEX idx_addr(addr) USING INVERTED PROPERTIES("parser"="english"),
INDEX idx_n(n) USING INVERTED
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """insert into ${table1} values
(1,null,'addr qie3','yy','lj',100),
(2,null,'hehe',null,'lala',200),
(3,'beijing','addr xuanwu','wugui',null,300),
(4,'beijing','addr fengtai','fengtai1','fengtai2',null),
(5,'beijing','addr chaoyang','wangjing','donghuqu',500),
(6,'shanghai','hehe',null,'haha',null),
(7,'tengxun','qie','addr gg','lj',null),
(8,'tengxun2','qie',null,'lj',800)
"""
// case1: enable nereids planner
sql "set enable_nereids_planner = true"
qt_select_nereids_0 "SELECT * FROM ${table1} ORDER BY id"
qt_select_nereids_1 "SELECT count() FROM ${table1} WHERE n > 100"
qt_select_nereids_2 "SELECT count() FROM ${table1} WHERE city = 'beijing'"
qt_select_nereids_3 "SELECT count(*) FROM ${table1} WHERE city = 'beijing'"
qt_select_nereids_4 "SELECT * FROM ${table1} WHERE city = 'beijing' ORDER BY id"
qt_select_nereids_5 "SELECT city, addr, name FROM ${table1} WHERE city = 'beijing' ORDER BY id"
qt_select_nereids_6 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY city"
qt_select_nereids_7 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY id"
qt_select_nereids_8 "SELECT upper(city), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
qt_select_nereids_9 "SELECT length(addr), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
qt_select_nereids_10 "SELECT addr, name FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
qt_select_nereids_11 "SELECT addr, name, upper(city) FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
qt_select_nereids_12 "SELECT sum(n) FROM ${table1} WHERE city = 'beijing' group by id ORDER BY id"
// case2: disable nereids planner
sql "set enable_nereids_planner = false"
qt_select_0 "SELECT * FROM ${table1} ORDER BY id"
qt_select_1 "SELECT count() FROM ${table1} WHERE n > 100"
qt_select_2 "SELECT count() FROM ${table1} WHERE city = 'beijing'"
qt_select_3 "SELECT count(*) FROM ${table1} WHERE city = 'beijing'"
qt_select_4 "SELECT * FROM ${table1} WHERE city = 'beijing' ORDER BY id"
qt_select_5 "SELECT city, addr, name FROM ${table1} WHERE city = 'beijing' ORDER BY id"
qt_select_6 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY city"
qt_select_7 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY id"
qt_select_8 "SELECT upper(city), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
qt_select_9 "SELECT length(addr), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
qt_select_10 "SELECT addr, name FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
qt_select_11 "SELECT addr, name, upper(city) FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
qt_select_12 "SELECT sum(n) FROM ${table1} WHERE city = 'beijing' group by id ORDER BY id"
}