From 88c5e64c4ae26408e4e6bb84a2d28012238b790c Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Mon, 3 Apr 2023 22:32:43 +0800 Subject: [PATCH] [fix](nereids) fix bug of SelectMaterializedIndexWithAggregate rule (#18265) 1. create a project node to adjust the output column position when a mv is selected in olap scan node 2. pass SlotReference's column info when call Alias's toSlot() method 3. should compare plan's logical properties when compare two plans after rewrite --- .../nereids/jobs/batch/NereidsRewriter.java | 13 ++++-- ...lectMaterializedIndexWithoutAggregate.java | 45 ++++++++++++++++--- .../nereids/trees/expressions/Alias.java | 5 ++- .../nereids/trees/plans/AbstractPlan.java | 10 +++++ .../grouping_sets/test_grouping_sets1.out | 2 +- .../test_dup_mv_schema_change.out | 15 +++++++ .../grouping_sets/test_grouping_sets1.groovy | 2 +- .../test_dup_mv_schema_change.groovy | 5 +++ 8 files changed, 85 insertions(+), 12 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/NereidsRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/NereidsRewriter.java index 93f5f3faff..33827d0b16 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/NereidsRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/NereidsRewriter.java @@ -62,6 +62,7 @@ import org.apache.doris.nereids.rules.rewrite.logical.NormalizeSort; import org.apache.doris.nereids.rules.rewrite.logical.PruneOlapScanPartition; import org.apache.doris.nereids.rules.rewrite.logical.PruneOlapScanTablet; import org.apache.doris.nereids.rules.rewrite.logical.PushFilterInsideJoin; +import org.apache.doris.nereids.rules.rewrite.logical.PushdownFilterThroughProject; import org.apache.doris.nereids.rules.rewrite.logical.PushdownLimit; import org.apache.doris.nereids.rules.rewrite.logical.ReorderJoin; import org.apache.doris.nereids.rules.rewrite.logical.SemiJoinAggTranspose; @@ -227,18 +228,24 @@ public class NereidsRewriter extends BatchRewriteJob { ), // TODO: I think these rules should be implementation rules, and generate alternative physical plans. - topic("Table/MV/Physical optimization", + topic("Table/Physical optimization", topDown( // TODO: the logical plan should not contains any phase information, // we should refactor like AggregateStrategies, e.g. LimitStrategies, // generate one PhysicalLimit if current distribution is gather or two // PhysicalLimits with gather exchange new SplitLimit(), + new PruneOlapScanPartition() + ) + ), + topic("MV optimization", + topDown( new SelectMaterializedIndexWithAggregate(), new SelectMaterializedIndexWithoutAggregate(), - new PruneOlapScanTablet(), - new PruneOlapScanPartition() + new PushdownFilterThroughProject(), + new MergeProjects(), + new PruneOlapScanTablet() ) ), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/mv/SelectMaterializedIndexWithoutAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/mv/SelectMaterializedIndexWithoutAggregate.java index 9d004ca5f7..41bf87c09d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/mv/SelectMaterializedIndexWithoutAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/mv/SelectMaterializedIndexWithoutAggregate.java @@ -24,15 +24,20 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.rules.rewrite.RewriteRuleFactory; +import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.plans.PreAggStatus; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; import java.util.List; import java.util.Set; @@ -111,7 +116,7 @@ public class SelectMaterializedIndexWithoutAggregate extends AbstractSelectMater * @param predicatesSupplier Supplier to get pushdown predicates. * @return Result scan node. */ - private LogicalOlapScan select( + private LogicalPlan select( LogicalOlapScan scan, Supplier> requiredScanOutputSupplier, Supplier> predicatesSupplier) { @@ -132,13 +137,17 @@ public class SelectMaterializedIndexWithoutAggregate extends AbstractSelectMater } if (scan.getTable().isDupKeysOrMergeOnWrite()) { // Set pre-aggregation to `on` to keep consistency with legacy logic. - List candidate = scan.getTable().getVisibleIndex().stream() + List candidates = scan.getTable().getVisibleIndex().stream() .filter(index -> !indexHasAggregate(index, scan)) .filter(index -> containAllRequiredColumns(index, scan, requiredScanOutputSupplier.get())) .collect(Collectors.toList()); - return scan.withMaterializedIndexSelected(PreAggStatus.on(), - selectBestIndex(candidate, scan, predicatesSupplier.get())); + long bestIndex = selectBestIndex(candidates, scan, predicatesSupplier.get()); + if (bestIndex == baseIndexId) { + return scan.withMaterializedIndexSelected(PreAggStatus.on(), bestIndex); + } else { + return createProjectForMv(scan.withMaterializedIndexSelected(PreAggStatus.on(), bestIndex)); + } } else { final PreAggStatus preAggStatus; if (preAggEnabledByHint(scan)) { @@ -163,8 +172,8 @@ public class SelectMaterializedIndexWithoutAggregate extends AbstractSelectMater // `candidates` only have base index. return scan.withMaterializedIndexSelected(preAggStatus, baseIndexId); } else { - return scan.withMaterializedIndexSelected(preAggStatus, - selectBestIndex(candidates, scan, predicatesSupplier.get())); + return createProjectForMv(scan.withMaterializedIndexSelected(preAggStatus, + selectBestIndex(candidates, scan, predicatesSupplier.get()))); } } } @@ -174,4 +183,28 @@ public class SelectMaterializedIndexWithoutAggregate extends AbstractSelectMater .stream() .anyMatch(Column::isAggregated); } + + private LogicalProject createProjectForMv(LogicalOlapScan scan) { + Preconditions.checkArgument(scan.getSelectedIndexId() != scan.getTable().getBaseIndexId()); + List mvSlots = scan.getOutputByMvIndex(scan.getSelectedIndexId()); + List baseSlots = scan.getOutputByMvIndex(scan.getTable().getBaseIndexId()); + List aliases = Lists.newArrayList(); + List baseColumnNames = mvSlots.stream() + .map(slot -> org.apache.doris.analysis.CreateMaterializedViewStmt.mvColumnBreaker(slot.getName())) + .collect(Collectors.toList()); + boolean isMvName = org.apache.doris.analysis.CreateMaterializedViewStmt.isMVColumn(mvSlots.get(0).getName()); + for (int i = 0; i < baseColumnNames.size(); ++i) { + for (Slot slot : baseSlots) { + if (((SlotReference) slot).getColumn().get().getName() + .equals(baseColumnNames.get(i))) { + aliases.add( + new Alias(slot.getExprId(), isMvName ? mvSlots.get(i) : slot, baseColumnNames.get(i))); + break; + } + } + } + return new LogicalProject(aliases, + isMvName ? scan.withOutput(scan.getOutputByMvIndex(scan.getSelectedIndexId())) + : scan); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java index 4ca9fca500..05e0a513e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java @@ -63,7 +63,10 @@ public class Alias extends NamedExpression implements UnaryExpression { @Override public Slot toSlot() throws UnboundException { - return new SlotReference(exprId, name, child().getDataType(), child().nullable(), qualifier); + return new SlotReference(exprId, name, child().getDataType(), child().nullable(), qualifier, + child() instanceof SlotReference + ? ((SlotReference) child()).getColumn().orElse(null) + : null); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java index 09d6581894..aee916224f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java @@ -29,6 +29,7 @@ import org.apache.doris.nereids.metrics.event.CounterEvent; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.UnboundLogicalProperties; import org.apache.doris.nereids.trees.AbstractTreeNode; +import org.apache.doris.nereids.trees.TreeNode; import org.apache.doris.nereids.trees.expressions.ExprId; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.util.MutableState; @@ -194,4 +195,13 @@ public abstract class AbstractPlan extends AbstractTreeNode implements Pla : ""; return groupId; } + + @Override + public boolean deepEquals(TreeNode o) { + AbstractPlan that = (AbstractPlan) o; + if (Objects.equals(getLogicalProperties(), that.getLogicalProperties())) { + return super.deepEquals(o); + } + return false; + } } diff --git a/regression-test/data/query_p0/grouping_sets/test_grouping_sets1.out b/regression-test/data/query_p0/grouping_sets/test_grouping_sets1.out index 31973c7bb9..e5e0f7ee9c 100644 --- a/regression-test/data/query_p0/grouping_sets/test_grouping_sets1.out +++ b/regression-test/data/query_p0/grouping_sets/test_grouping_sets1.out @@ -30,7 +30,7 @@ a \N a -1 0 1 0 1 1 1 \N \N all -1 1 1 1 1 3 2 -- !sql_grouping_nullable -- +\N empty \N empty 2019-05-04 2019-05-04 2019-05-04 2019-05-04 2019-05-05 2019-05-05 2019-05-05 2019-05-05 -\N empty \N empty diff --git a/regression-test/data/schema_change_p0/test_dup_mv_schema_change.out b/regression-test/data/schema_change_p0/test_dup_mv_schema_change.out index b3c7185e3d..b3b5e1cdfa 100644 --- a/regression-test/data/schema_change_p0/test_dup_mv_schema_change.out +++ b/regression-test/data/schema_change_p0/test_dup_mv_schema_change.out @@ -26,3 +26,18 @@ 2 2017-10-01 Beijing 10 2020-01-03T00:00 2020-01-03T00:00 2020-01-03T00:00 1 32 20 1 2 2017-10-01 Beijing 10 2020-01-02T00:00 2020-01-02T00:00 2020-01-02T00:00 1 31 21 1 +-- !sql -- +1 +1 +2 +2 +3 +3 +4 +5 +5 +5 +5 +5 +5 + diff --git a/regression-test/suites/query_p0/grouping_sets/test_grouping_sets1.groovy b/regression-test/suites/query_p0/grouping_sets/test_grouping_sets1.groovy index e3808f55f0..1f12de6628 100644 --- a/regression-test/suites/query_p0/grouping_sets/test_grouping_sets1.groovy +++ b/regression-test/suites/query_p0/grouping_sets/test_grouping_sets1.groovy @@ -191,6 +191,6 @@ suite("test_grouping_sets1") { ) idt_765 group by GROUPING SETS((idt_765.entry_date),()) - ) t_1 on t_0.dim_207 = t_1.dim_207; + ) t_1 on t_0.dim_207 = t_1.dim_207 order by publish_date; """ } diff --git a/regression-test/suites/schema_change_p0/test_dup_mv_schema_change.groovy b/regression-test/suites/schema_change_p0/test_dup_mv_schema_change.groovy index e4e171e86d..c14b7a378d 100644 --- a/regression-test/suites/schema_change_p0/test_dup_mv_schema_change.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_mv_schema_change.groovy @@ -245,6 +245,11 @@ suite ("test_dup_mv_schema_change") { qt_sc """ SELECT * FROM ${tableName} WHERE user_id=2 order by min_dwell_time""" + sql "set enable_nereids_planner=true" + sql "set enable_fallback_to_original_planner=false" + + qt_sql """ SELECT user_id from ${tableName} order by user_id; """ + } finally { //try_sql("DROP TABLE IF EXISTS ${tableName}") }