From 844b7c8cba1ac4747b4d2c1071ad9a8c0ddac819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E5=81=A5?= Date: Mon, 30 Oct 2023 17:21:42 +0800 Subject: [PATCH] [enhancement](Nereids): check stats unreliable when deriving stats (#26103) check stats unreliable when deriving stats --- .../org/apache/doris/nereids/PlanContext.java | 10 +++++- .../doris/nereids/cost/CostModelV1.java | 31 ++----------------- .../org/apache/doris/nereids/memo/Group.java | 10 +++++- .../doris/nereids/stats/StatsCalculator.java | 4 +++ .../plans/physical/AbstractPhysicalJoin.java | 8 +++++ .../plans/physical/PhysicalHashJoin.java | 7 ----- 6 files changed, 33 insertions(+), 37 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java index 847d18afdc..07a2cf4eed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java @@ -33,9 +33,10 @@ import java.util.List; public class PlanContext { private final List childrenStats; - private Statistics planStats; + private final Statistics planStats; private final int arity; private boolean isBroadcastJoin = false; + private final boolean isStatsReliable; /** * Constructor for PlanContext. @@ -43,15 +44,18 @@ public class PlanContext { public PlanContext(GroupExpression groupExpression) { this.arity = groupExpression.arity(); this.planStats = groupExpression.getOwnerGroup().getStatistics(); + this.isStatsReliable = groupExpression.getOwnerGroup().isStatsReliable(); this.childrenStats = new ArrayList<>(groupExpression.arity()); for (int i = 0; i < groupExpression.arity(); i++) { childrenStats.add(groupExpression.childStatistics(i)); } } + // This is used in GraphSimplifier public PlanContext(Statistics planStats, List childrenStats) { this.planStats = planStats; this.childrenStats = childrenStats; + this.isStatsReliable = false; this.arity = this.childrenStats.size(); } @@ -71,6 +75,10 @@ public class PlanContext { return planStats; } + public boolean isStatsReliable() { + return isStatsReliable; + } + /** * Get child statistics. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java index a2364eb025..8c2b467920 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java @@ -22,7 +22,6 @@ import org.apache.doris.nereids.properties.DistributionSpec; import org.apache.doris.nereids.properties.DistributionSpecGather; import org.apache.doris.nereids.properties.DistributionSpecHash; import org.apache.doris.nereids.properties.DistributionSpecReplicated; -import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.physical.PhysicalAssertNumRows; import org.apache.doris.nereids.trees.plans.physical.PhysicalDeferMaterializeOlapScan; @@ -303,7 +302,7 @@ class CostModelV1 extends PlanVisitor { } // TODO: since the outputs rows may expand a lot, penalty on it will cause bc never be chosen. // will refine this in next generation cost model. - if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) { + if (!context.isStatsReliable()) { // forbid broadcast join when stats is unknown return CostV1.of(rightRowCount * buildSideFactor + 1 / leftRowCount, rightRowCount, @@ -315,7 +314,7 @@ class CostModelV1 extends PlanVisitor { 0 ); } - if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) { + if (!context.isStatsReliable()) { return CostV1.of(rightRowCount + 1 / leftRowCount, rightRowCount, 0); @@ -326,30 +325,6 @@ class CostModelV1 extends PlanVisitor { ); } - private boolean isStatsUnknown(PhysicalHashJoin join, - Statistics build, Statistics probe) { - for (Slot slot : join.getConditionSlot()) { - if ((build.columnStatistics().containsKey(slot) && !build.columnStatistics().get(slot).isUnKnown) - || (probe.columnStatistics().containsKey(slot) && !probe.columnStatistics().get(slot).isUnKnown)) { - continue; - } - return true; - } - return false; - } - - private boolean isStatsUnknown(PhysicalNestedLoopJoin join, - Statistics build, Statistics probe) { - for (Slot slot : join.getConditionSlot()) { - if ((build.columnStatistics().containsKey(slot) && !build.columnStatistics().get(slot).isUnKnown) - || (probe.columnStatistics().containsKey(slot) && !probe.columnStatistics().get(slot).isUnKnown)) { - continue; - } - return true; - } - return false; - } - @Override public Cost visitPhysicalNestedLoopJoin( PhysicalNestedLoopJoin nestedLoopJoin, @@ -358,7 +333,7 @@ class CostModelV1 extends PlanVisitor { Preconditions.checkState(context.arity() == 2); Statistics leftStatistics = context.getChildStatistics(0); Statistics rightStatistics = context.getChildStatistics(1); - if (isStatsUnknown(nestedLoopJoin, leftStatistics, rightStatistics)) { + if (!context.isStatsReliable()) { return CostV1.of(rightStatistics.getRowCount() + 1 / leftStatistics.getRowCount(), rightStatistics.getRowCount(), 0); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java index 750de4fa8f..8009156e13 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java @@ -59,7 +59,7 @@ public class Group { private final List logicalExpressions = Lists.newArrayList(); private final List physicalExpressions = Lists.newArrayList(); private final List enforcers = Lists.newArrayList(); - + private boolean isStatsReliable = true; private LogicalProperties logicalProperties; // Map of cost lower bounds @@ -119,6 +119,14 @@ public class Group { return groupExpression; } + public void setStatsReliable(boolean statsReliable) { + this.isStatsReliable = statsReliable; + } + + public boolean isStatsReliable() { + return isStatsReliable; + } + public void addLogicalExpression(GroupExpression groupExpression) { groupExpression.setOwnerGroup(this); logicalExpressions.add(groupExpression); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 6ee5c642b9..0fad8ea1d1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -221,8 +221,12 @@ public class StatsCalculator extends DefaultPlanVisitor { Plan plan = groupExpression.getPlan(); Statistics newStats = plan.accept(this, null); newStats.enforceValid(); + // We ensure that the rowCount remains unchanged in order to make the cost of each plan comparable. if (groupExpression.getOwnerGroup().getStatistics() == null) { + boolean isReliable = groupExpression.getPlan().getExpressions().stream() + .noneMatch(e -> newStats.isInputSlotsUnknown(e.getInputSlots())); + groupExpression.getOwnerGroup().setStatsReliable(isReliable); groupExpression.getOwnerGroup().setStatistics(newStats); groupExpression.setEstOutputRowCount(newStats.getRowCount()); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java index acd2e38139..c6bdd7b6b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java @@ -35,6 +35,7 @@ import org.apache.doris.statistics.Statistics; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import org.json.JSONObject; @@ -42,7 +43,9 @@ import java.util.Collection; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Abstract class for all physical join node. @@ -217,6 +220,11 @@ public abstract class AbstractPhysicalJoin< .build(); } + public Set getConditionSlot() { + return Stream.concat(hashJoinConjuncts.stream(), otherJoinConjuncts.stream()) + .flatMap(expr -> expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet()); + } + @Override public String toString() { List args = Lists.newArrayList("type", joinType, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java index a6bd8cfdd6..b60afd6730 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java @@ -43,7 +43,6 @@ import org.apache.doris.statistics.Statistics; import org.apache.doris.thrift.TRuntimeFilterType; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -52,7 +51,6 @@ import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import java.util.stream.Stream; /** * Physical hash join plan. @@ -246,11 +244,6 @@ public class PhysicalHashJoin< return pushedDown; } - public Set getConditionSlot() { - return Stream.concat(hashJoinConjuncts.stream(), otherJoinConjuncts.stream()) - .flatMap(expr -> expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet()); - } - @Override public String shapeInfo() { StringBuilder builder = new StringBuilder();