branch-2.1:[fix](parquet/orc) Disable string dictionary filtering when predicate express is not binary pred and in pred (#50749) (#51266)

cherry-pick: #50749
This commit is contained in:
Socrates
2025-05-27 18:12:56 +08:00
committed by GitHub
parent c838678b18
commit dd89a78d30
4 changed files with 217 additions and 85 deletions

View File

@ -2068,29 +2068,17 @@ bool OrcReader::_can_filter_by_dict(int slot_id) {
return false;
}
std::function<bool(const VExpr* expr)> visit_function_call = [&](const VExpr* expr) {
// TODO: The current implementation of dictionary filtering does not take into account
// the implementation of NULL values because the dictionary itself does not contain
// NULL value encoding. As a result, many NULL-related functions or expressions
// cannot work properly, such as is null, is not null, coalesce, etc.
// Here we first disable dictionary filtering when predicate expr is not slot.
// Implementation of NULL value dictionary filtering will be carried out later.
if (expr->node_type() != TExprNodeType::SLOT_REF) {
return false;
}
for (auto& child : expr->children()) {
if (!visit_function_call(child.get())) {
return false;
}
}
return true;
};
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
if (!visit_function_call(ctx->root().get())) {
return false;
}
}
return true;
// TODO: The current implementation of dictionary filtering does not take into account
// the implementation of NULL values because the dictionary itself does not contain
// NULL value encoding. As a result, many NULL-related functions or expressions
// cannot work properly, such as is null, is not null, coalesce, etc.
// Here we check if the predicate expr is IN or BINARY_PRED.
// Implementation of NULL value dictionary filtering will be carried out later.
return std::ranges::all_of(_slot_id_to_filter_conjuncts->at(slot_id), [&](const auto& ctx) {
return (ctx->root()->node_type() == TExprNodeType::IN_PRED ||
ctx->root()->node_type() == TExprNodeType::BINARY_PRED) &&
ctx->root()->children()[0]->node_type() == TExprNodeType::SLOT_REF;
});
}
Status OrcReader::on_string_dicts_loaded(

View File

@ -199,37 +199,25 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id,
return false;
}
if (_slot_id_to_filter_conjuncts->find(slot_id) == _slot_id_to_filter_conjuncts->end()) {
return false;
}
if (!is_dictionary_encoded(column_metadata)) {
return false;
}
std::function<bool(const VExpr* expr)> visit_function_call = [&](const VExpr* expr) {
// TODO: The current implementation of dictionary filtering does not take into account
// the implementation of NULL values because the dictionary itself does not contain
// NULL value encoding. As a result, many NULL-related functions or expressions
// cannot work properly, such as is null, is not null, coalesce, etc.
// Here we first disable dictionary filtering when predicate is not slot.
// Implementation of NULL value dictionary filtering will be carried out later.
if (expr->node_type() != TExprNodeType::SLOT_REF) {
return false;
}
for (auto& child : expr->children()) {
if (!visit_function_call(child.get())) {
return false;
}
}
return true;
};
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
if (!visit_function_call(ctx->root().get())) {
return false;
}
if (_slot_id_to_filter_conjuncts->find(slot_id) == _slot_id_to_filter_conjuncts->end()) {
return false;
}
return true;
// TODO: The current implementation of dictionary filtering does not take into account
// the implementation of NULL values because the dictionary itself does not contain
// NULL value encoding. As a result, many NULL-related functions or expressions
// cannot work properly, such as is null, is not null, coalesce, etc.
// Here we check if the predicate expr is IN or BINARY_PRED.
// Implementation of NULL value dictionary filtering will be carried out later.
return std::ranges::all_of(_slot_id_to_filter_conjuncts->at(slot_id), [&](const auto& ctx) {
return (ctx->root()->node_type() == TExprNodeType::IN_PRED ||
ctx->root()->node_type() == TExprNodeType::BINARY_PRED) &&
ctx->root()->children()[0]->node_type() == TExprNodeType::SLOT_REF;
});
}
// This function is copied from

View File

@ -59,66 +59,36 @@ null
-- !q15 --
5
-- !q01 --
-- !q16 --
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q02 --
-- !q17 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
-- !q03 --
-- !q18 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q04 --
-- !q19 --
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
-- !q05 --
-- !q20 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q06 --
-- !q21 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q07 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q08 --
-- !q09 --
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q10 --
null
-- !q11 --
null
-- !q12 --
null
-- !q13 --
null
-- !q14 --
null
-- !q15 --
5
-- !q01 --
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
@ -179,6 +149,36 @@ null
-- !q15 --
5
-- !q16 --
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q17 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
-- !q18 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q19 --
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
-- !q20 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q21 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q01 --
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
@ -239,3 +239,123 @@ null
-- !q15 --
5
-- !q16 --
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q17 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
-- !q18 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q19 --
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
-- !q20 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q21 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q01 --
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q02 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
-- !q03 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q04 --
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
-- !q05 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q06 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q07 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q08 --
-- !q09 --
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q10 --
null
-- !q11 --
null
-- !q12 --
null
-- !q13 --
null
-- !q14 --
null
-- !q15 --
5
-- !q16 --
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q17 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
-- !q18 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q19 --
4 136777 O 32151.78 1995-10-11 \N Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro
-- !q20 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly
-- !q21 --
1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among
3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos
5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly

View File

@ -62,6 +62,24 @@ suite("test_string_dict_filter", "p0,external,hive,external_docker,external_dock
qt_q15 """
select count(o_orderpriority) from ( select (case when o_orderpriority = 'x' then '1' when o_orderpriority = 'y' then '2' else '0' end) as o_orderpriority from test_string_dict_filter_parquet ) as A where o_orderpriority = '0';
"""
qt_q16 """
select * from test_string_dict_filter_parquet where cast(o_orderstatus as string) = 'F';
"""
qt_q17 """
select * from test_string_dict_filter_parquet where cast(o_orderstatus as string) = 'O';
"""
qt_q18 """
select * from test_string_dict_filter_parquet where cast(o_orderstatus as string) in ('O', 'F');
"""
qt_q19 """
select * from test_string_dict_filter_parquet where cast(o_orderpriority as string) is null;
"""
qt_q20 """
select * from test_string_dict_filter_parquet where cast(o_orderpriority as string) is not null;
"""
qt_q21 """
select * from test_string_dict_filter_parquet where cast(o_orderpriority as string) in ('5-LOW', NULL);
"""
}
def q_orc = {
qt_q01 """
@ -109,6 +127,24 @@ suite("test_string_dict_filter", "p0,external,hive,external_docker,external_dock
qt_q15 """
select count(o_orderpriority) from ( select (case when o_orderpriority = 'x' then '1' when o_orderpriority = 'y' then '2' else '0' end) as o_orderpriority from test_string_dict_filter_orc ) as A where o_orderpriority = '0';
"""
qt_q16 """
select * from test_string_dict_filter_orc where cast(o_orderstatus as string) = 'F';
"""
qt_q17 """
select * from test_string_dict_filter_orc where cast(o_orderstatus as string) = 'O';
"""
qt_q18 """
select * from test_string_dict_filter_orc where cast(o_orderstatus as string) in ('O', 'F');
"""
qt_q19 """
select * from test_string_dict_filter_orc where cast(o_orderpriority as string) is null;
"""
qt_q20 """
select * from test_string_dict_filter_orc where cast(o_orderpriority as string) is not null;
"""
qt_q21 """
select * from test_string_dict_filter_orc where cast(o_orderpriority as string) in ('5-LOW', NULL);
"""
}
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {