[fix](nereids)exists subquery should handle top level scarlar agg correctly (#29135)

This commit is contained in:
starocean999
2023-12-29 09:45:20 +08:00
committed by GitHub
parent 61677d1d4b
commit d2dc12bed5
3 changed files with 193 additions and 22 deletions

View File

@ -325,7 +325,10 @@ public class SubqueryToApply implements AnalysisRuleFactory {
LogicalPlan tmpPlan = childPlan;
for (int i = 0; i < subqueryExprs.size(); ++i) {
SubqueryExpr subqueryExpr = subqueryExprs.get(i);
if (nonMarkJoinExistsWithAgg(subqueryExpr, subqueryToMarkJoinSlot)) {
if (subqueryExpr instanceof Exists && hasTopLevelScalarAgg(subqueryExpr.getQueryPlan())) {
// because top level scalar agg always returns a value or null(for empty input)
// so Exists and Not Exists conjunct are always evaluated to True and false literals respectively
// we don't create apply node for it
continue;
}
@ -338,18 +341,11 @@ public class SubqueryToApply implements AnalysisRuleFactory {
return tmpPlan;
}
private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists,
Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot) {
return exists instanceof Exists
&& !subqueryToMarkJoinSlot.get(exists).isPresent()
&& hasTopLevelAggWithoutGroupBy(exists.getQueryPlan());
}
private static boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
private static boolean hasTopLevelScalarAgg(Plan plan) {
if (plan instanceof LogicalAggregate) {
return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty();
} else if (plan instanceof LogicalProject || plan instanceof LogicalSort) {
return hasTopLevelAggWithoutGroupBy(plan.child(0));
return hasTopLevelScalarAgg(plan.child(0));
}
return false;
}
@ -427,19 +423,30 @@ public class SubqueryToApply implements AnalysisRuleFactory {
// The result set when NULL is specified in the subquery and still evaluates to TRUE by using EXISTS
// When the number of rows returned is empty, agg will return null, so if there is more agg,
// it will always consider the returned result to be true
boolean needCreateMarkJoinSlot = isMarkJoin || shouldOutputMarkJoinSlot;
MarkJoinSlotReference markJoinSlotReference = null;
if (hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()) && needCreateMarkJoinSlot) {
markJoinSlotReference =
new MarkJoinSlotReference(statementContext.generateColumnName(), true);
} else if (needCreateMarkJoinSlot) {
markJoinSlotReference =
new MarkJoinSlotReference(statementContext.generateColumnName());
if (hasTopLevelScalarAgg(exists.getQueryPlan())) {
/*
top level scalar agg and always return a value or null for empty input
so Exists and Not Exists conjunct are always evaluated to True and False literals respectively
SELECT *
FROM t1
WHERE EXISTS (
SELECT SUM(a)
FROM t2
WHERE t1.a = t2.b and t1.a = 1;
);
*/
return exists.isNot() ? BooleanLiteral.FALSE : BooleanLiteral.TRUE;
} else {
boolean needCreateMarkJoinSlot = isMarkJoin || shouldOutputMarkJoinSlot;
if (needCreateMarkJoinSlot) {
MarkJoinSlotReference markJoinSlotReference =
new MarkJoinSlotReference(statementContext.generateColumnName());
context.setSubqueryToMarkJoinSlot(exists, Optional.of(markJoinSlotReference));
return markJoinSlotReference;
} else {
return BooleanLiteral.TRUE;
}
}
if (needCreateMarkJoinSlot) {
context.setSubqueryToMarkJoinSlot(exists, Optional.of(markJoinSlotReference));
}
return needCreateMarkJoinSlot ? markJoinSlotReference : BooleanLiteral.TRUE;
}
@Override

View File

@ -489,3 +489,84 @@ true
-- !doris_7643 --
3 3
-- !select_exists1 --
\N \N 0
\N \N 100
-- !select_exists2 --
-- !select_exists3 --
\N \N 0
\N \N 100
-- !select_exists4 --
\N \N 100
-- !select_exists5 --
true
true
-- !select_exists6 --
false
false
-- !select_exists7 --
true
true
-- !select_exists8 --
false
true
-- !select_exists9 --
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
-- !select_exists10 --
-- !select_exists11 --
\N \N 0
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
-- !select_exists12 --
\N \N 0

View File

@ -679,4 +679,87 @@ suite ("sub_query_correlated") {
// order_qt_doris_6937_2 """
// select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 < 10 order by k1, k2;
// """
sql """drop table if exists table_21_undef_partitions2_keys3;"""
sql """drop table if exists table_1_undef_partitions2_keys3;"""
sql """create table table_21_undef_partitions2_keys3 (
`col_int_undef_signed` int ,
`col_varchar_10__undef_signed` varchar(10) ,
`pk` int
) engine=olap
distributed by hash(pk) buckets 10
properties('replication_num' = '1');"""
sql """create table table_1_undef_partitions2_keys3 (
`col_int_undef_signed` int ,
`col_varchar_10__undef_signed` varchar(10) ,
`pk` int
) engine=olap
distributed by hash(pk) buckets 10
properties('replication_num' = '1');"""
sql """insert into table_21_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed) values (0,null,null),(1,6,'c'),(2,7,'m'),(3,null,null),(4,null,'b'),(5,4,null),(6,3,null),(7,0,'z'),(8,null,"me"),(9,6,null),(10,0,""),(11,null,'e'),(12,null,"up"),(13,null,""),(14,7,'s'),(15,null,""),(16,3,null),(17,null,""),(18,null,""),(19,4,""),(20,7,null);"""
sql """insert into table_1_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed) values (0,null,null),(100,null,null);"""
qt_select_exists1 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) order by t1.pk;"""
qt_select_exists2 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE not EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) order by t1.pk;"""
qt_select_exists3 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = 9
) or t1.pk = 100 order by t1.pk;"""
qt_select_exists4 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE not EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = 9
) or t1.pk = 100 order by t1.pk;"""
qt_select_exists5 """select EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) from table_1_undef_partitions2_keys3 AS t1;"""
qt_select_exists6 """select not EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
qt_select_exists7 """select EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) or t1.pk = 100 from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
qt_select_exists8 """select EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) and t1.pk = 100 from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
qt_select_exists9 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and not exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk = 100 order by t1.pk;"""
qt_select_exists10 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and not exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk > 100 order by t1.pk;"""
qt_select_exists11 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk = 100 order by t1.pk;"""
qt_select_exists12 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk > 100 order by t1.pk;"""
}