From 06f0c10c8b49b422abb9ae3cf42fea860289c705 Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Fri, 17 Nov 2023 12:31:42 +0800 Subject: [PATCH] [fix](nereids) count in correlated subquery shoud not output null value (#27064) consider sql: SELECT * FROM t1 WHERE t1.a <= (SELECT COUNT(t2.a) FROM t2 WHERE (t1.b = t2.b)); when unnest correlated subquery, we create a left join node. Assume outer query is left table and subquery is right one. If there is no match, the row from right table is filled with nulls. But COUNT function is always not nullable. So wrap COUNT with Nvl to ensure it's result is 0 instead of null to get the correct result --- .../expression/rules/FunctionBinder.java | 19 ++++++++++++++++++- .../nereids_p0/subquery/test_subquery.groovy | 5 +++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java index ac6a83f9cf..30e36c273f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java @@ -49,8 +49,11 @@ import org.apache.doris.nereids.trees.expressions.TimestampArithmetic; import org.apache.doris.nereids.trees.expressions.WhenClause; import org.apache.doris.nereids.trees.expressions.functions.BoundFunction; import org.apache.doris.nereids.trees.expressions.functions.FunctionBuilder; +import org.apache.doris.nereids.trees.expressions.functions.agg.Count; import org.apache.doris.nereids.trees.expressions.functions.scalar.Lambda; +import org.apache.doris.nereids.trees.expressions.functions.scalar.Nvl; import org.apache.doris.nereids.trees.expressions.functions.udf.AliasUdfBuilder; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; import org.apache.doris.nereids.trees.expressions.typecoercion.ImplicitCastInputTypes; import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.BigIntType; @@ -167,7 +170,21 @@ public class FunctionBinder extends AbstractExpressionRewriteRule { // we do type coercion in build function in alias function, so it's ok to return directly. return builder.build(functionName, arguments); } else { - return TypeCoercionUtils.processBoundFunction((BoundFunction) builder.build(functionName, arguments)); + Expression boundFunction = TypeCoercionUtils + .processBoundFunction((BoundFunction) builder.build(functionName, arguments)); + if (boundFunction instanceof Count + && context.cascadesContext.getOuterScope().isPresent() + && !context.cascadesContext.getOuterScope().get().getCorrelatedSlots() + .isEmpty()) { + // consider sql: SELECT * FROM t1 WHERE t1.a <= (SELECT COUNT(t2.a) FROM t2 WHERE (t1.b = t2.b)); + // when unnest correlated subquery, we create a left join node. + // outer query is left table and subquery is right one + // if there is no match, the row from right table is filled with nulls + // but COUNT function is always not nullable. + // so wrap COUNT with Nvl to ensure it's result is 0 instead of null to get the correct result + boundFunction = new Nvl(boundFunction, new BigIntLiteral(0)); + } + return boundFunction; } } diff --git a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy index 01e347031f..7170fd20d5 100644 --- a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy +++ b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy @@ -118,6 +118,11 @@ suite("test_subquery") { contains("VAGGREGATE") } + explain { + sql """SELECT * FROM table_1000_undef_undef t1 WHERE t1.pk <= (SELECT COUNT(t2.pk) FROM table_1000_undef_undef2 t2 WHERE (t1.col_bigint_undef_signed = t2.col_bigint_undef_signed)); """ + contains("ifnull") + } + sql """DROP TABLE IF EXISTS table_1000_undef_undef""" sql """DROP TABLE IF EXISTS table_1000_undef_undef2"""