[Fix](multi-catalog) Fix hive incorrect result by disable string dict filter if exprs contain null expr. (#23361)
Issue Number: close #21960 Fix hive incorrect result by disable string dict filter if exprs contain null expr.
This commit is contained in:
@ -1752,15 +1752,26 @@ bool OrcReader::_can_filter_by_dict(int slot_id) {
|
||||
}
|
||||
|
||||
// TODO:check expr like 'a > 10 is null', 'a > 10' should can be filter by dict.
|
||||
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
|
||||
const auto& root_expr = ctx->root();
|
||||
if (root_expr->node_type() == TExprNodeType::FUNCTION_CALL) {
|
||||
std::function<bool(const VExpr* expr)> visit_function_call = [&](const VExpr* expr) {
|
||||
if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
|
||||
std::string is_null_str;
|
||||
std::string function_name = root_expr->fn().name.function_name;
|
||||
std::string function_name = expr->fn().name.function_name;
|
||||
if (function_name.compare("is_null_pred") == 0 ||
|
||||
function_name.compare("is_not_null_pred") == 0) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
for (auto& child : expr->children()) {
|
||||
if (!visit_function_call(child.get())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
|
||||
if (!visit_function_call(ctx->root().get())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -197,19 +197,31 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id,
|
||||
}
|
||||
|
||||
// TODO:check expr like 'a > 10 is null', 'a > 10' should can be filter by dict.
|
||||
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
|
||||
const auto& root_expr = ctx->root();
|
||||
if (root_expr->node_type() == TExprNodeType::FUNCTION_CALL) {
|
||||
std::function<bool(const VExpr* expr)> visit_function_call = [&](const VExpr* expr) {
|
||||
if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
|
||||
std::string is_null_str;
|
||||
std::string function_name = root_expr->fn().name.function_name;
|
||||
std::string function_name = expr->fn().name.function_name;
|
||||
if (function_name.compare("is_null_pred") == 0 ||
|
||||
function_name.compare("is_not_null_pred") == 0) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
for (auto& child : expr->children()) {
|
||||
if (!visit_function_call(child.get())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
|
||||
if (!visit_function_call(ctx->root().get())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// This function is copied from
|
||||
// https://github.com/apache/impala/blob/master/be/src/exec/parquet/hdfs-parquet-scanner.cc#L1717
|
||||
bool RowGroupReader::is_dictionary_encoded(const tparquet::ColumnMetaData& column_metadata) {
|
||||
|
||||
@ -120,3 +120,9 @@ Z6n2t4XA2n7CXTECJ,PE,iBbsCh0RE1Dd2A,z48
|
||||
\N 2073732 2 13846443 596483.00 21.00 29163.75 0.10 0.08 R F 1994-12-06 1995-01-01 DELIVER IN PERSON FOB dolphins nag furiously q
|
||||
\N 2479044 4 9763795 13805.00 40.00 74332.40 0.05 0.05 R F 1994-11-16 1995-01-01 COLLECT COD RAIL equests hinder qu
|
||||
|
||||
-- !null_expr_dict_filter_orc --
|
||||
4844 4363
|
||||
|
||||
-- !null_expr_dict_filter_parquet --
|
||||
4844 4363
|
||||
|
||||
|
||||
@ -93,6 +93,11 @@ suite("test_external_catalog_hive", "p2,external,hive,external_remote,external_r
|
||||
qt_not_single_slot_filter_conjuncts_orc """ select * from multi_catalog.lineitem_string_date_orc where l_commitdate < l_receiptdate and l_receiptdate = '1995-01-01' order by l_orderkey, l_partkey, l_suppkey, l_linenumber limit 10; """
|
||||
qt_not_single_slot_filter_conjuncts_parquet """ select * from multi_catalog.lineitem_string_date_orc where l_commitdate < l_receiptdate and l_receiptdate = '1995-01-01' order by l_orderkey, l_partkey, l_suppkey, l_linenumber limit 10; """
|
||||
|
||||
// test null expr with dict filter issue
|
||||
qt_null_expr_dict_filter_orc """ select count(*), count(distinct user_no) from multi_catalog.dict_fitler_test_orc WHERE partitions in ('2023-08-21') and actual_intf_type = 'type1' and (REUSE_FLAG<> 'y' or REUSE_FLAG is null); """
|
||||
qt_null_expr_dict_filter_parquet """ select count(*), count(distinct user_no) from multi_catalog.dict_fitler_test_parquet WHERE partitions in ('2023-08-21') and actual_intf_type = 'type1' and (REUSE_FLAG<> 'y' or REUSE_FLAG is null); """
|
||||
|
||||
|
||||
// test remember last used database after switch / rename catalog
|
||||
sql """switch ${catalog_name};"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user