[fix](orc)fix orc reader missing column. (#35735)
## Proposed changes bp #35583 Issue Number: close #xxx <!--Describe your changes.-->
This commit is contained in:
@ -821,6 +821,15 @@ Status OrcReader::set_fill_columns(
|
||||
if (iter == predicate_columns.end()) {
|
||||
_lazy_read_ctx.missing_columns.emplace(kv.first, kv.second);
|
||||
} else {
|
||||
//For check missing column : missing column == xx, missing column is null,missing column is not null.
|
||||
if (_slot_id_to_filter_conjuncts->find(iter->second.second) !=
|
||||
_slot_id_to_filter_conjuncts->end()) {
|
||||
for (auto& ctx : _slot_id_to_filter_conjuncts->find(iter->second.second)->second) {
|
||||
_filter_conjuncts.emplace_back(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
// predicate_missing_columns is VLiteral.To fill in default values for missing columns.
|
||||
_lazy_read_ctx.predicate_missing_columns.emplace(kv.first, kv.second);
|
||||
_lazy_read_ctx.all_predicate_col_ids.emplace_back(iter->second.first);
|
||||
}
|
||||
@ -1732,10 +1741,6 @@ Status OrcReader::get_next_block_impl(Block* block, size_t* read_rows, bool* eof
|
||||
for (auto& conjunct : _non_dict_filter_conjuncts) {
|
||||
filter_conjuncts.emplace_back(conjunct);
|
||||
}
|
||||
//include missing_columns != missing_columns ; missing_column is null; missing_column != file_columns etc...
|
||||
for (auto& [missing_col, conjunct] : _lazy_read_ctx.predicate_missing_columns) {
|
||||
filter_conjuncts.emplace_back(conjunct);
|
||||
}
|
||||
std::vector<IColumn::Filter*> filters;
|
||||
if (_delete_rows_filter_ptr) {
|
||||
filters.push_back(_delete_rows_filter_ptr.get());
|
||||
@ -1757,6 +1762,7 @@ Status OrcReader::get_next_block_impl(Block* block, size_t* read_rows, bool* eof
|
||||
RETURN_IF_CATCH_EXCEPTION(
|
||||
Block::filter_block_internal(block, columns_to_filter, result_filter));
|
||||
}
|
||||
//_not_single_slot_filter_conjuncts check : missing column1 == missing column2 , missing column == exists column ...
|
||||
if (!_not_single_slot_filter_conjuncts.empty()) {
|
||||
RETURN_IF_ERROR(_convert_dict_cols_to_string_cols(block, &batch_vec));
|
||||
RETURN_IF_CATCH_EXCEPTION(
|
||||
@ -1894,10 +1900,6 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s
|
||||
for (auto& conjunct : _non_dict_filter_conjuncts) {
|
||||
filter_conjuncts.emplace_back(conjunct);
|
||||
}
|
||||
//include missing_columns != missing_columns ; missing_column is null; missing_column != file_columns etc...
|
||||
for (auto& [missing_col, conjunct] : _lazy_read_ctx.predicate_missing_columns) {
|
||||
filter_conjuncts.emplace_back(conjunct);
|
||||
}
|
||||
std::vector<IColumn::Filter*> filters;
|
||||
if (_delete_rows_filter_ptr) {
|
||||
filters.push_back(_delete_rows_filter_ptr.get());
|
||||
|
||||
@ -603,6 +603,26 @@ CREATE TABLE `unsupported_type_table`(
|
||||
|
||||
set hive.stats.column.autogather=false;
|
||||
|
||||
CREATE TABLE `test_hive_orc_add_column`(
|
||||
id int,
|
||||
col1 int
|
||||
)
|
||||
stored as orc;
|
||||
insert into `test_hive_orc_add_column` values(1,2);
|
||||
insert into `test_hive_orc_add_column` values(3,4),(4,6);
|
||||
alter table `test_hive_orc_add_column` ADD COLUMNS (col2 int);
|
||||
insert into `test_hive_orc_add_column` values(7,8,9);
|
||||
insert into `test_hive_orc_add_column` values(10,11,null);
|
||||
insert into `test_hive_orc_add_column` values(12,13,null);
|
||||
insert into `test_hive_orc_add_column` values(14,15,16);
|
||||
alter table `test_hive_orc_add_column` ADD COLUMNS (col3 int,col4 string);
|
||||
insert into `test_hive_orc_add_column` values(17,18,19,20,"hello world");
|
||||
insert into `test_hive_orc_add_column` values(21,22,23,24,"cywcywcyw");
|
||||
insert into `test_hive_orc_add_column` values(25,26,null,null,null);
|
||||
insert into `test_hive_orc_add_column` values(27,28,29,null,null);
|
||||
insert into `test_hive_orc_add_column` values(30,31,32,33,null);
|
||||
|
||||
|
||||
CREATE TABLE `schema_evo_test_text`(
|
||||
id int,
|
||||
name string
|
||||
@ -2500,4 +2520,3 @@ PARTITIONED BY (
|
||||
`varchar_col` varchar(50))
|
||||
stored as orc
|
||||
TBLPROPERTIES("orc.compress"="ZLIB");
|
||||
|
||||
|
||||
@ -0,0 +1,185 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !orc_add_col1 --
|
||||
1 2 \N \N \N
|
||||
3 4 \N \N \N
|
||||
4 6 \N \N \N
|
||||
7 8 9 \N \N
|
||||
10 11 \N \N \N
|
||||
12 13 \N \N \N
|
||||
14 15 16 \N \N
|
||||
17 18 19 20 hello world
|
||||
21 22 23 24 cywcywcyw
|
||||
25 26 \N \N \N
|
||||
27 28 29 \N \N
|
||||
30 31 32 33 \N
|
||||
|
||||
-- !orc_add_col2 --
|
||||
|
||||
-- !orc_add_col3 --
|
||||
|
||||
-- !orc_add_col4 --
|
||||
1 2 \N \N \N
|
||||
3 4 \N \N \N
|
||||
4 6 \N \N \N
|
||||
10 11 \N \N \N
|
||||
12 13 \N \N \N
|
||||
25 26 \N \N \N
|
||||
|
||||
-- !orc_add_col5 --
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
|
||||
-- !orc_add_col6 --
|
||||
1 2 \N \N \N
|
||||
3 4 \N \N \N
|
||||
4 6 \N \N \N
|
||||
7 8 9 \N \N
|
||||
10 11 \N \N \N
|
||||
12 13 \N \N \N
|
||||
14 15 16 \N \N
|
||||
25 26 \N \N \N
|
||||
27 28 29 \N \N
|
||||
|
||||
-- !orc_add_col7 --
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
|
||||
-- !orc_add_col8 --
|
||||
1 2 \N \N \N
|
||||
3 4 \N \N \N
|
||||
4 6 \N \N \N
|
||||
7 8 9 \N \N
|
||||
10 11 \N \N \N
|
||||
12 13 \N \N \N
|
||||
14 15 16 \N \N
|
||||
25 26 \N \N \N
|
||||
27 28 29 \N \N
|
||||
30 31 32 33 \N
|
||||
|
||||
-- !orc_add_col9 --
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
|
||||
-- !orc_add_col10 --
|
||||
1 2 \N \N \N
|
||||
3 4 \N \N \N
|
||||
4 6 \N \N \N
|
||||
7 8 9 \N \N
|
||||
10 11 \N \N \N
|
||||
12 13 \N \N \N
|
||||
14 15 16 \N \N
|
||||
17 18 19 20 hello world
|
||||
21 22 23 24 cywcywcyw
|
||||
25 26 \N \N \N
|
||||
27 28 29 \N \N
|
||||
30 31 32 33 \N
|
||||
|
||||
-- !orc_add_col11 --
|
||||
2
|
||||
4
|
||||
6
|
||||
8
|
||||
11
|
||||
13
|
||||
15
|
||||
18
|
||||
22
|
||||
26
|
||||
28
|
||||
31
|
||||
|
||||
-- !orc_add_col12 --
|
||||
7 8 9 \N \N
|
||||
14 15 16 \N \N
|
||||
17 18 19 20 hello world
|
||||
21 22 23 24 cywcywcyw
|
||||
27 28 29 \N \N
|
||||
30 31 32 33 \N
|
||||
|
||||
-- !orc_add_col13 --
|
||||
9
|
||||
16
|
||||
19
|
||||
23
|
||||
29
|
||||
32
|
||||
|
||||
-- !orc_add_col14 --
|
||||
17 18 19 20 hello world
|
||||
21 22 23 24 cywcywcyw
|
||||
30 31 32 33 \N
|
||||
|
||||
-- !orc_add_col15 --
|
||||
20
|
||||
24
|
||||
33
|
||||
|
||||
-- !orc_add_col16 --
|
||||
17 18 19 20 hello world
|
||||
21 22 23 24 cywcywcyw
|
||||
|
||||
-- !orc_add_col17 --
|
||||
cywcywcyw
|
||||
hello world
|
||||
|
||||
-- !orc_add_col18 --
|
||||
7 8 9 \N \N
|
||||
|
||||
-- !orc_add_col19 --
|
||||
|
||||
-- !orc_add_col20 --
|
||||
7 8 9 \N \N
|
||||
14 15 16 \N \N
|
||||
17 18 19 20 hello world
|
||||
21 22 23 24 cywcywcyw
|
||||
27 28 29 \N \N
|
||||
30 31 32 33 \N
|
||||
|
||||
-- !orc_add_col21 --
|
||||
7 8 9 \N \N
|
||||
14 15 16 \N \N
|
||||
17 18 19 20 hello world
|
||||
21 22 23 24 cywcywcyw
|
||||
27 28 29 \N \N
|
||||
30 31 32 33 \N
|
||||
|
||||
-- !orc_add_col22 --
|
||||
|
||||
-- !orc_add_col23 --
|
||||
30 31 32 33 \N
|
||||
|
||||
-- !orc_add_col24 --
|
||||
|
||||
-- !orc_add_col25 --
|
||||
17 18 19 20 hello world
|
||||
21 22 23 24 cywcywcyw
|
||||
30 31 32 33 \N
|
||||
|
||||
-- !orc_add_col26 --
|
||||
|
||||
-- !orc_add_col27 --
|
||||
21 22 23 24 cywcywcyw
|
||||
|
||||
-- !orc_add_col28 --
|
||||
17 18 19 20 hello world
|
||||
21 22 23 24 cywcywcyw
|
||||
|
||||
@ -0,0 +1,95 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_hive_orc_add_column", "all_types,p0,external,hive,external_docker,external_docker_hive") {
|
||||
|
||||
String enabled = context.config.otherConfigs.get("enableHiveTest")
|
||||
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
|
||||
logger.info("diable Hive test.")
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
String hms_port = context.config.otherConfigs.get("hive3HmsPort")
|
||||
String catalog_name = "hive3_test_orc_add_column"
|
||||
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
|
||||
|
||||
sql """drop catalog if exists ${catalog_name}"""
|
||||
sql """create catalog if not exists ${catalog_name} properties (
|
||||
"type"="hms",
|
||||
'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
|
||||
);"""
|
||||
sql """use `${catalog_name}`.`default`"""
|
||||
|
||||
|
||||
qt_orc_add_col1 """select * from test_hive_orc_add_column order by id ;"""
|
||||
qt_orc_add_col2 """select * from test_hive_orc_add_column where col1 is null order by id ;"""
|
||||
qt_orc_add_col3 """select col1 from test_hive_orc_add_column where col1 is null;"""
|
||||
qt_orc_add_col4 """select * from test_hive_orc_add_column where col2 is null order by id ;"""
|
||||
qt_orc_add_col5 """select col2 from test_hive_orc_add_column where col2 is null;"""
|
||||
qt_orc_add_col6 """select * from test_hive_orc_add_column where col3 is null order by id ;"""
|
||||
qt_orc_add_col7 """select col3 from test_hive_orc_add_column where col3 is null;"""
|
||||
qt_orc_add_col8 """select * from test_hive_orc_add_column where col4 is null order by id ;"""
|
||||
qt_orc_add_col9 """select col4 from test_hive_orc_add_column where col4 is null;"""
|
||||
qt_orc_add_col10 """select * from test_hive_orc_add_column where col1 is not null order by id ;"""
|
||||
qt_orc_add_col11 """select col1 from test_hive_orc_add_column where col1 is not null order by col1;"""
|
||||
qt_orc_add_col12 """select * from test_hive_orc_add_column where col2 is not null order by id ;"""
|
||||
qt_orc_add_col13 """select col2 from test_hive_orc_add_column where col2 is not null order by col2;"""
|
||||
qt_orc_add_col14 """select * from test_hive_orc_add_column where col3 is not null order by id ;"""
|
||||
qt_orc_add_col15 """select col3 from test_hive_orc_add_column where col3 is not null order by col3;"""
|
||||
qt_orc_add_col16 """select * from test_hive_orc_add_column where col4 is not null order by id ;"""
|
||||
qt_orc_add_col17 """select col4 from test_hive_orc_add_column where col4 is not null order by col4;"""
|
||||
qt_orc_add_col18 """select * from test_hive_orc_add_column where col2 = 9 order by id ;"""
|
||||
qt_orc_add_col19 """select * from test_hive_orc_add_column where col2 = 190 order by id ;"""
|
||||
qt_orc_add_col20 """select * from test_hive_orc_add_column where col2 - col1 = 1 order by id ;"""
|
||||
qt_orc_add_col21 """select * from test_hive_orc_add_column where col2 - id = 2 order by id ;"""
|
||||
qt_orc_add_col22 """select * from test_hive_orc_add_column where col2 - id = 3 order by id ;"""
|
||||
qt_orc_add_col23 """select * from test_hive_orc_add_column where col3 = 33 order by id ;"""
|
||||
qt_orc_add_col24 """select * from test_hive_orc_add_column where col3 = 330 order by id ;"""
|
||||
qt_orc_add_col25 """select * from test_hive_orc_add_column where col3 - col1 = 2 order by id ;"""
|
||||
qt_orc_add_col26 """select * from test_hive_orc_add_column where col3 - id != 3 order by id ;"""
|
||||
qt_orc_add_col27 """select * from test_hive_orc_add_column where col1 + col2 + col3 = 23*3 order by id ;"""
|
||||
qt_orc_add_col28 """select * from test_hive_orc_add_column where col1 + col2 + col3 != 32*3 order by id ; """
|
||||
|
||||
|
||||
|
||||
sql """drop catalog if exists ${catalog_name}"""
|
||||
|
||||
} finally {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// CREATE TABLE `test_hive_orc_add_column`(
|
||||
// id int,
|
||||
// col1 int
|
||||
// )
|
||||
// stored as orc;
|
||||
// insert into `test_hive_orc_add_column` values(1,2);
|
||||
// insert into `test_hive_orc_add_column` values(3,4),(4,6);
|
||||
// alter table `test_hive_orc_add_column` ADD COLUMNS(col2 int);
|
||||
// insert into `test_hive_orc_add_column` values(7,8,9);
|
||||
// insert into `test_hive_orc_add_column` values(10,11,null);
|
||||
// insert into `test_hive_orc_add_column` values(12,13,null);
|
||||
// insert into `test_hive_orc_add_column` values(14,15,16);
|
||||
// alter table `test_hive_orc_add_column` ADD COLUMNS(col3 int,col4 string);
|
||||
// insert into `test_hive_orc_add_column` values(17,18,19,20,"hello world");
|
||||
// insert into `test_hive_orc_add_column` values(21,22,23,24,"cywcywcyw");
|
||||
// insert into `test_hive_orc_add_column` values(25,26,null,null,null);
|
||||
// insert into `test_hive_orc_add_column` values(27,28,29,null,null);
|
||||
// insert into `test_hive_orc_add_column` values(30,31,32,33,null);
|
||||
Reference in New Issue
Block a user