[opt](nereids) use 2 phase agg above union all (#26245)
forbid one phase agg for pattern: agg-unionAll one phase agg plan: agg-union-hashDistribute-children two phase agg plan: agg(global) - hashDistribute-agg(local)-union-randomDistribute the key point is the cost of randomDistribute is much lower than the hashDistribute, and hence two-phase agg wins.
This commit is contained in:
@ -56,6 +56,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
|
||||
// the penalty factor is no more than BROADCAST_JOIN_SKEW_PENALTY_LIMIT
|
||||
static final double BROADCAST_JOIN_SKEW_RATIO = 30.0;
|
||||
static final double BROADCAST_JOIN_SKEW_PENALTY_LIMIT = 2.0;
|
||||
static final double RANDOM_SHUFFLE_TO_HASH_SHUFFLE_FACTOR = 0.1;
|
||||
private final int beNumber;
|
||||
|
||||
public CostModelV1(ConnectContext connectContext) {
|
||||
@ -217,10 +218,11 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
|
||||
}
|
||||
|
||||
// any
|
||||
// cost of randome shuffle is lower than hash shuffle.
|
||||
return CostV1.of(context.getSessionVariable(),
|
||||
intputRowCount,
|
||||
0,
|
||||
0);
|
||||
0,
|
||||
intputRowCount * childStatistics.dataSizeFactor() * RANDOM_SHUFFLE_TO_HASH_SHUFFLE_FACTOR / beNumber);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -42,6 +42,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin;
|
||||
import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN;
|
||||
import org.apache.doris.nereids.trees.plans.physical.PhysicalProject;
|
||||
import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation;
|
||||
import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion;
|
||||
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
|
||||
import org.apache.doris.nereids.util.JoinUtils;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
@ -117,6 +118,15 @@ public class ChildrenPropertiesRegulator extends PlanVisitor<Boolean, Void> {
|
||||
&& children.get(0).getPlan() instanceof PhysicalDistribute) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// agg(group by x)-union all(A, B)
|
||||
// no matter x.ndv is high or not, it is not worthwhile to shuffle A and B by x
|
||||
// and hence we forbid one phase agg
|
||||
if (agg.getAggMode() == AggMode.INPUT_TO_RESULT
|
||||
&& children.get(0).getPlan() instanceof PhysicalUnion
|
||||
&& !((PhysicalUnion) children.get(0).getPlan()).isDistinct()) {
|
||||
return false;
|
||||
}
|
||||
// forbid multi distinct opt that bad than multi-stage version when multi-stage can be executed in one fragment
|
||||
if (agg.getAggMode() == AggMode.INPUT_TO_BUFFER || agg.getAggMode() == AggMode.INPUT_TO_RESULT) {
|
||||
List<MultiDistinction> multiDistinctions = agg.getOutputExpressions().stream()
|
||||
|
||||
@ -194,4 +194,8 @@ public abstract class PhysicalSetOperation extends AbstractPhysicalPlan implemen
|
||||
.map(NamedExpression::toSlot)
|
||||
.collect(ImmutableList.toImmutableList());
|
||||
}
|
||||
|
||||
public boolean isDistinct() {
|
||||
return qualifier == Qualifier.DISTINCT;
|
||||
}
|
||||
}
|
||||
|
||||
@ -40,7 +40,7 @@ import java.util.Set;
|
||||
public class ColumnStatistic {
|
||||
|
||||
public static final double STATS_ERROR = 0.1D;
|
||||
|
||||
public static final double ALMOST_UNIQUE_FACTOR = 0.9;
|
||||
public static final StatsType NDV = StatsType.NDV;
|
||||
public static final StatsType AVG_SIZE = StatsType.AVG_SIZE;
|
||||
public static final StatsType MAX_SIZE = StatsType.MAX_SIZE;
|
||||
@ -211,7 +211,7 @@ public class ColumnStatistic {
|
||||
}
|
||||
|
||||
public static boolean isAlmostUnique(double ndv, double rowCount) {
|
||||
return rowCount * 0.9 < ndv && ndv < rowCount * 1.1;
|
||||
return rowCount * ALMOST_UNIQUE_FACTOR < ndv;
|
||||
}
|
||||
|
||||
public ColumnStatistic updateByLimit(long limit, double rowCount) {
|
||||
|
||||
Reference in New Issue
Block a user