diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index fd76602884..e1274c1959 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -1752,15 +1752,26 @@ bool OrcReader::_can_filter_by_dict(int slot_id) { } // TODO:check expr like 'a > 10 is null', 'a > 10' should can be filter by dict. - for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { - const auto& root_expr = ctx->root(); - if (root_expr->node_type() == TExprNodeType::FUNCTION_CALL) { + std::function visit_function_call = [&](const VExpr* expr) { + if (expr->node_type() == TExprNodeType::FUNCTION_CALL) { std::string is_null_str; - std::string function_name = root_expr->fn().name.function_name; + std::string function_name = expr->fn().name.function_name; if (function_name.compare("is_null_pred") == 0 || function_name.compare("is_not_null_pred") == 0) { return false; } + } else { + for (auto& child : expr->children()) { + if (!visit_function_call(child.get())) { + return false; + } + } + } + return true; + }; + for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { + if (!visit_function_call(ctx->root().get())) { + return false; } } return true; diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 665ab453b6..97961d8113 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -197,19 +197,31 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id, } // TODO:check expr like 'a > 10 is null', 'a > 10' should can be filter by dict. - for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { - const auto& root_expr = ctx->root(); - if (root_expr->node_type() == TExprNodeType::FUNCTION_CALL) { + std::function visit_function_call = [&](const VExpr* expr) { + if (expr->node_type() == TExprNodeType::FUNCTION_CALL) { std::string is_null_str; - std::string function_name = root_expr->fn().name.function_name; + std::string function_name = expr->fn().name.function_name; if (function_name.compare("is_null_pred") == 0 || function_name.compare("is_not_null_pred") == 0) { return false; } + } else { + for (auto& child : expr->children()) { + if (!visit_function_call(child.get())) { + return false; + } + } + } + return true; + }; + for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { + if (!visit_function_call(ctx->root().get())) { + return false; } } return true; } + // This function is copied from // https://github.com/apache/impala/blob/master/be/src/exec/parquet/hdfs-parquet-scanner.cc#L1717 bool RowGroupReader::is_dictionary_encoded(const tparquet::ColumnMetaData& column_metadata) { diff --git a/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out b/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out index ae29339cec..0cb7db4732 100644 --- a/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out +++ b/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out @@ -120,3 +120,9 @@ Z6n2t4XA2n7CXTECJ,PE,iBbsCh0RE1Dd2A,z48 \N 2073732 2 13846443 596483.00 21.00 29163.75 0.10 0.08 R F 1994-12-06 1995-01-01 DELIVER IN PERSON FOB dolphins nag furiously q \N 2479044 4 9763795 13805.00 40.00 74332.40 0.05 0.05 R F 1994-11-16 1995-01-01 COLLECT COD RAIL equests hinder qu +-- !null_expr_dict_filter_orc -- +4844 4363 + +-- !null_expr_dict_filter_parquet -- +4844 4363 + diff --git a/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy b/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy index d993327d40..4ca7c8d417 100644 --- a/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy +++ b/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy @@ -93,6 +93,11 @@ suite("test_external_catalog_hive", "p2,external,hive,external_remote,external_r qt_not_single_slot_filter_conjuncts_orc """ select * from multi_catalog.lineitem_string_date_orc where l_commitdate < l_receiptdate and l_receiptdate = '1995-01-01' order by l_orderkey, l_partkey, l_suppkey, l_linenumber limit 10; """ qt_not_single_slot_filter_conjuncts_parquet """ select * from multi_catalog.lineitem_string_date_orc where l_commitdate < l_receiptdate and l_receiptdate = '1995-01-01' order by l_orderkey, l_partkey, l_suppkey, l_linenumber limit 10; """ + // test null expr with dict filter issue + qt_null_expr_dict_filter_orc """ select count(*), count(distinct user_no) from multi_catalog.dict_fitler_test_orc WHERE partitions in ('2023-08-21') and actual_intf_type = 'type1' and (REUSE_FLAG<> 'y' or REUSE_FLAG is null); """ + qt_null_expr_dict_filter_parquet """ select count(*), count(distinct user_no) from multi_catalog.dict_fitler_test_parquet WHERE partitions in ('2023-08-21') and actual_intf_type = 'type1' and (REUSE_FLAG<> 'y' or REUSE_FLAG is null); """ + + // test remember last used database after switch / rename catalog sql """switch ${catalog_name};"""