From 6bf51150f3178e113f7ce7814364a43ffacb3b04 Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Mon, 24 Apr 2023 13:58:57 +0800 Subject: [PATCH] [fix](nereids) remove unnecessary project above scan node (#18920) 1. remove unnecessary project node above scan node. 2. fix in subquery may be recognized as scalar subquery bug 3. fix some Quantile related functions' return type bug --- .../org/apache/doris/nereids/DorisParser.g4 | 2 +- .../translator/PhysicalPlanTranslator.java | 54 +++++++++----- .../functions/agg/QuantileUnion.java | 4 +- .../functions/scalar/QuantilePercent.java | 4 +- .../functions/scalar/ToQuantileState.java | 4 +- .../data/nereids_p0/aggregate/aggregate.out | 3 + .../nereids_p0/aggregate/aggregate.groovy | 72 ++++++++++++++++++- 7 files changed, 118 insertions(+), 25 deletions(-) diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index a3df7fa713..175da2ecec 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -281,8 +281,8 @@ booleanExpression predicate : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression | NOT? kind=(LIKE | REGEXP | RLIKE) pattern=valueExpression - | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN + | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN | IS NOT? kind=NULL ; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 7961fddd57..f27abc0f69 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -1454,24 +1454,44 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor slots = context.getTupleDesc(inputPlanNode.getTupleIds().get(0)).getSlots(); - SlotDescriptor lastSlot = slots.get(slots.size() - 1); - if (lastSlot.getColumn() != null && lastSlot.getColumn().getName().equals(Column.ROWID_COL)) { - inputPlanNode.getProjectList().add(new SlotRef(lastSlot)); - injectRowIdColumnSlot(tupleDescriptor); - requiredSlotIdSet.add(lastSlot.getId()); - } - } - if (inputPlanNode instanceof ScanNode) { - updateChildSlotsMaterialization(inputPlanNode, requiredSlotIdSet, requiredByProjectSlotIdSet, context); - return inputFragment; + TupleDescriptor tupleDescriptor = null; + if (requiredByProjectSlotIdSet.size() != requiredSlotIdSet.size() + || execExprList.stream().collect(Collectors.toSet()).size() != execExprList.size() + || execExprList.stream().anyMatch(expr -> !(expr instanceof SlotRef))) { + tupleDescriptor = generateTupleDesc(slotList, null, context); + inputPlanNode.setProjectList(execExprList); + inputPlanNode.setOutputTupleDesc(tupleDescriptor); + } else { + for (int i = 0; i < slotList.size(); ++i) { + context.addExprIdSlotRefPair(slotList.get(i).getExprId(), + (SlotRef) execExprList.get(i)); + } + } + + // TODO: this is a temporary scheme to support two phase read when has project. + // we need to refactor all topn opt into rbo stage. + if (inputPlanNode instanceof OlapScanNode) { + ArrayList slots = + context.getTupleDesc(inputPlanNode.getTupleIds().get(0)).getSlots(); + SlotDescriptor lastSlot = slots.get(slots.size() - 1); + if (lastSlot.getColumn() != null + && lastSlot.getColumn().getName().equals(Column.ROWID_COL)) { + if (tupleDescriptor != null) { + injectRowIdColumnSlot(tupleDescriptor); + SlotRef slotRef = new SlotRef(lastSlot); + inputPlanNode.getProjectList().add(slotRef); + requiredByProjectSlotIdSet.add(lastSlot.getId()); + } + requiredSlotIdSet.add(lastSlot.getId()); + } + } + updateChildSlotsMaterialization(inputPlanNode, requiredSlotIdSet, + requiredByProjectSlotIdSet, context); + } else { + TupleDescriptor tupleDescriptor = generateTupleDesc(slotList, null, context); + inputPlanNode.setProjectList(execExprList); + inputPlanNode.setOutputTupleDesc(tupleDescriptor); } return inputFragment; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/QuantileUnion.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/QuantileUnion.java index 92f6638019..fba37528fd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/QuantileUnion.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/QuantileUnion.java @@ -20,8 +20,8 @@ package org.apache.doris.nereids.trees.expressions.functions.agg; import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; -import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DataType; @@ -36,7 +36,7 @@ import java.util.List; * AggregateFunction 'quantile_union'. This class is generated by GenerateFunction. */ public class QuantileUnion extends AggregateFunction - implements UnaryExpression, ExplicitlyCastableSignature, PropagateNullable { + implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(QuantileStateType.INSTANCE).args(QuantileStateType.INSTANCE) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/QuantilePercent.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/QuantilePercent.java index 8068c1dcb1..3505fd3775 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/QuantilePercent.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/QuantilePercent.java @@ -19,8 +19,8 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; -import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DoubleType; @@ -36,7 +36,7 @@ import java.util.List; * ScalarFunction 'quantile_percent'. This class is generated by GenerateFunction. */ public class QuantilePercent extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable { + implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DoubleType.INSTANCE).args(QuantileStateType.INSTANCE, FloatType.INSTANCE) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToQuantileState.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToQuantileState.java index e9b2a032f7..cdd286372b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToQuantileState.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToQuantileState.java @@ -20,8 +20,8 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; -import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.FloatType; @@ -37,7 +37,7 @@ import java.util.List; * ScalarFunction 'to_quantile_state'. This class is generated by GenerateFunction. */ public class ToQuantileState extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable { + implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(QuantileStateType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, FloatType.INSTANCE) diff --git a/regression-test/data/nereids_p0/aggregate/aggregate.out b/regression-test/data/nereids_p0/aggregate/aggregate.out index 2b1ca1c99b..83b67e9ae8 100644 --- a/regression-test/data/nereids_p0/aggregate/aggregate.out +++ b/regression-test/data/nereids_p0/aggregate/aggregate.out @@ -374,3 +374,6 @@ TESTING AGAIN -- !aggregate -- 9 9 10 8 7 7 7 2 +-- !select_quantile_percent -- +5000.0 + diff --git a/regression-test/suites/nereids_p0/aggregate/aggregate.groovy b/regression-test/suites/nereids_p0/aggregate/aggregate.groovy index cd58e700ab..976a7bfae3 100644 --- a/regression-test/suites/nereids_p0/aggregate/aggregate.groovy +++ b/regression-test/suites/nereids_p0/aggregate/aggregate.groovy @@ -173,8 +173,77 @@ suite("aggregate") { qt_aggregate """ select any(c_bigint), any(c_double),any(c_string), any(c_date), any(c_timestamp),any_value(c_date_1), any(c_timestamp_1), any_value(c_timestamp_2), any(c_timestamp_3) , any(c_boolean), any(c_short_decimal), any(c_long_decimal)from ${tableName2} """ + // init query case data + def dbName = "test_agg_db" + sql "DROP DATABASE IF EXISTS ${dbName}" + sql "CREATE DATABASE ${dbName}" + sql "USE $dbName" + sql """ + CREATE TABLE IF NOT EXISTS `baseall` ( + `k0` boolean null comment "", + `k1` tinyint(4) null comment "", + `k2` smallint(6) null comment "", + `k3` int(11) null comment "", + `k4` bigint(20) null comment "", + `k5` decimal(9, 3) null comment "", + `k6` char(5) null comment "", + `k10` date null comment "", + `k11` datetime null comment "", + `k7` varchar(20) null comment "", + `k8` double max null comment "", + `k9` float sum null comment "", + `k12` string replace null comment "", + `k13` largeint(40) replace null comment "" + ) engine=olap + DISTRIBUTED BY HASH(`k1`) BUCKETS 5 properties("replication_num" = "1") + """ + sql """ + CREATE TABLE IF NOT EXISTS `test` ( + `k0` boolean null comment "", + `k1` tinyint(4) null comment "", + `k2` smallint(6) null comment "", + `k3` int(11) null comment "", + `k4` bigint(20) null comment "", + `k5` decimal(9, 3) null comment "", + `k6` char(5) null comment "", + `k10` date null comment "", + `k11` datetime null comment "", + `k7` varchar(20) null comment "", + `k8` double max null comment "", + `k9` float sum null comment "", + `k12` string replace_if_not_null null comment "", + `k13` largeint(40) replace null comment "" + ) engine=olap + DISTRIBUTED BY HASH(`k1`) BUCKETS 5 properties("replication_num" = "1") + """ + sql """ + CREATE TABLE IF NOT EXISTS `bigtable` ( + `k0` boolean null comment "", + `k1` tinyint(4) null comment "", + `k2` smallint(6) null comment "", + `k3` int(11) null comment "", + `k4` bigint(20) null comment "", + `k5` decimal(9, 3) null comment "", + `k6` char(5) null comment "", + `k10` date null comment "", + `k11` datetime null comment "", + `k7` varchar(20) null comment "", + `k8` double max null comment "", + `k9` float sum null comment "", + `k12` string replace null comment "", + `k13` largeint(40) replace null comment "" + ) engine=olap + DISTRIBUTED BY HASH(`k1`) BUCKETS 5 properties("replication_num" = "1") + """ + streamLoad { + table "baseall" + db dbName + set 'column_separator', ',' + file "../baseall.txt" + } + sql "insert into ${dbName}.test select * from ${dbName}.baseall where k1 <= 3" + sql "insert into ${dbName}.bigtable select * from ${dbName}.baseall" - sql "use test_query_db" List fields = ["k1", "k2", "k3", "k4", "k5", "k6", "k10", "k11", "k7", "k8", "k9"] // test_query_normal_aggression String k1 = fields[1] @@ -314,4 +383,5 @@ suite("aggregate") { qt_aggregate """ select avg(distinct c_bigint), avg(distinct c_double) from regression_test_nereids_p0_aggregate.${tableName} """ qt_aggregate """ select count(distinct c_bigint),count(distinct c_double),count(distinct c_string),count(distinct c_date_1),count(distinct c_timestamp_1),count(distinct c_timestamp_2),count(distinct c_timestamp_3),count(distinct c_boolean) from regression_test_nereids_p0_aggregate.${tableName} """ + qt_select_quantile_percent """ select QUANTILE_PERCENT(QUANTILE_UNION(TO_QUANTILE_STATE(c_bigint,2048)),0.5) from regression_test_nereids_p0_aggregate.${tableName}; """ }