[Fix](multi-catalog) Fix hive incorrect result by disable string dict filter if exprs contain null expr. (#23361)

Issue Number: close #21960

Fix hive incorrect result by disable string dict filter if exprs contain null expr.
This commit is contained in:
Qi Chen
2023-08-25 21:16:43 +08:00
committed by GitHub
parent 9d1c702b3a
commit 29273771f7
4 changed files with 42 additions and 8 deletions

View File

@ -1752,15 +1752,26 @@ bool OrcReader::_can_filter_by_dict(int slot_id) {
}
// TODO:check expr like 'a > 10 is null', 'a > 10' should can be filter by dict.
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
const auto& root_expr = ctx->root();
if (root_expr->node_type() == TExprNodeType::FUNCTION_CALL) {
std::function<bool(const VExpr* expr)> visit_function_call = [&](const VExpr* expr) {
if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
std::string is_null_str;
std::string function_name = root_expr->fn().name.function_name;
std::string function_name = expr->fn().name.function_name;
if (function_name.compare("is_null_pred") == 0 ||
function_name.compare("is_not_null_pred") == 0) {
return false;
}
} else {
for (auto& child : expr->children()) {
if (!visit_function_call(child.get())) {
return false;
}
}
}
return true;
};
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
if (!visit_function_call(ctx->root().get())) {
return false;
}
}
return true;

View File

@ -197,19 +197,31 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id,
}
// TODO:check expr like 'a > 10 is null', 'a > 10' should can be filter by dict.
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
const auto& root_expr = ctx->root();
if (root_expr->node_type() == TExprNodeType::FUNCTION_CALL) {
std::function<bool(const VExpr* expr)> visit_function_call = [&](const VExpr* expr) {
if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
std::string is_null_str;
std::string function_name = root_expr->fn().name.function_name;
std::string function_name = expr->fn().name.function_name;
if (function_name.compare("is_null_pred") == 0 ||
function_name.compare("is_not_null_pred") == 0) {
return false;
}
} else {
for (auto& child : expr->children()) {
if (!visit_function_call(child.get())) {
return false;
}
}
}
return true;
};
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
if (!visit_function_call(ctx->root().get())) {
return false;
}
}
return true;
}
// This function is copied from
// https://github.com/apache/impala/blob/master/be/src/exec/parquet/hdfs-parquet-scanner.cc#L1717
bool RowGroupReader::is_dictionary_encoded(const tparquet::ColumnMetaData& column_metadata) {

View File

@ -120,3 +120,9 @@ Z6n2t4XA2n7CXTECJ,PE,iBbsCh0RE1Dd2A,z48
\N 2073732 2 13846443 596483.00 21.00 29163.75 0.10 0.08 R F 1994-12-06 1995-01-01 DELIVER IN PERSON FOB dolphins nag furiously q
\N 2479044 4 9763795 13805.00 40.00 74332.40 0.05 0.05 R F 1994-11-16 1995-01-01 COLLECT COD RAIL equests hinder qu
-- !null_expr_dict_filter_orc --
4844 4363
-- !null_expr_dict_filter_parquet --
4844 4363

View File

@ -93,6 +93,11 @@ suite("test_external_catalog_hive", "p2,external,hive,external_remote,external_r
qt_not_single_slot_filter_conjuncts_orc """ select * from multi_catalog.lineitem_string_date_orc where l_commitdate < l_receiptdate and l_receiptdate = '1995-01-01' order by l_orderkey, l_partkey, l_suppkey, l_linenumber limit 10; """
qt_not_single_slot_filter_conjuncts_parquet """ select * from multi_catalog.lineitem_string_date_orc where l_commitdate < l_receiptdate and l_receiptdate = '1995-01-01' order by l_orderkey, l_partkey, l_suppkey, l_linenumber limit 10; """
// test null expr with dict filter issue
qt_null_expr_dict_filter_orc """ select count(*), count(distinct user_no) from multi_catalog.dict_fitler_test_orc WHERE partitions in ('2023-08-21') and actual_intf_type = 'type1' and (REUSE_FLAG<> 'y' or REUSE_FLAG is null); """
qt_null_expr_dict_filter_parquet """ select count(*), count(distinct user_no) from multi_catalog.dict_fitler_test_parquet WHERE partitions in ('2023-08-21') and actual_intf_type = 'type1' and (REUSE_FLAG<> 'y' or REUSE_FLAG is null); """
// test remember last used database after switch / rename catalog
sql """switch ${catalog_name};"""