[ehancemnet](nereids) Refactor statistics (#17637)

1. Support for more expression type
2. Support derive with histogram
3. Use StatisticRange to abstract to logic
4. Use Statistics rather than StatisDeriveResult
This commit is contained in:
AKIRA
2023-03-14 14:10:55 +09:00
committed by GitHub
parent be3a7e69cd
commit f1dde20315
78 changed files with 2151 additions and 1532 deletions

View File

@ -111,7 +111,7 @@ public class ShowColumnStatsStmt extends ShowStmt {
public ShowResultSet constructResultSet(List<Pair<String, ColumnStatistic>> columnStatistics) {
List<List<String>> result = Lists.newArrayList();
columnStatistics.forEach(p -> {
if (p.second == ColumnStatistic.DEFAULT) {
if (p.second == ColumnStatistic.UNKNOWN) {
return;
}
List<String> row = Lists.newArrayList();

View File

@ -19,7 +19,7 @@ package org.apache.doris.nereids;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.ArrayList;
import java.util.Arrays;
@ -32,8 +32,8 @@ import java.util.List;
* Inspired by GPORCA-CExpressionHandle.
*/
public class PlanContext {
private List<StatsDeriveResult> childrenStats = new ArrayList<>();
private StatsDeriveResult planStats = new StatsDeriveResult(0);
private List<Statistics> childrenStats = new ArrayList<>();
private Statistics planStats;
private int arity = 0;
/**
@ -51,7 +51,7 @@ public class PlanContext {
}
}
public PlanContext(StatsDeriveResult planStats, StatsDeriveResult... childrenStats) {
public PlanContext(Statistics planStats, Statistics... childrenStats) {
this.planStats = planStats;
this.childrenStats = Arrays.asList(childrenStats);
this.arity = this.childrenStats.size();
@ -61,14 +61,14 @@ public class PlanContext {
return arity;
}
public StatsDeriveResult getStatisticsWithCheck() {
public Statistics getStatisticsWithCheck() {
return planStats;
}
/**
* Get child statistics.
*/
public StatsDeriveResult getChildStatistics(int index) {
public Statistics getChildStatistics(int index) {
return childrenStats.get(index);
}
}

View File

@ -40,7 +40,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalStorageLayerAggrega
import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
@ -71,12 +71,12 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
@Override
public Cost visitPhysicalOlapScan(PhysicalOlapScan physicalOlapScan, PlanContext context) {
StatsDeriveResult statistics = context.getStatisticsWithCheck();
Statistics statistics = context.getStatisticsWithCheck();
return CostV1.ofCpu(statistics.getRowCount());
}
public Cost visitPhysicalSchemaScan(PhysicalSchemaScan physicalSchemaScan, PlanContext context) {
StatsDeriveResult statistics = context.getStatisticsWithCheck();
Statistics statistics = context.getStatisticsWithCheck();
return CostV1.ofCpu(statistics.getRowCount());
}
@ -91,7 +91,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
@Override
public Cost visitPhysicalFileScan(PhysicalFileScan physicalFileScan, PlanContext context) {
StatsDeriveResult statistics = context.getStatisticsWithCheck();
Statistics statistics = context.getStatisticsWithCheck();
return CostV1.ofCpu(statistics.getRowCount());
}
@ -102,13 +102,13 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
@Override
public Cost visitPhysicalJdbcScan(PhysicalJdbcScan physicalJdbcScan, PlanContext context) {
StatsDeriveResult statistics = context.getStatisticsWithCheck();
Statistics statistics = context.getStatisticsWithCheck();
return CostV1.ofCpu(statistics.getRowCount());
}
@Override
public Cost visitPhysicalEsScan(PhysicalEsScan physicalEsScan, PlanContext context) {
StatsDeriveResult statistics = context.getStatisticsWithCheck();
Statistics statistics = context.getStatisticsWithCheck();
return CostV1.ofCpu(statistics.getRowCount());
}
@ -116,11 +116,11 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
public Cost visitPhysicalQuickSort(
PhysicalQuickSort<? extends Plan> physicalQuickSort, PlanContext context) {
// TODO: consider two-phase sort and enforcer.
StatsDeriveResult statistics = context.getStatisticsWithCheck();
StatsDeriveResult childStatistics = context.getChildStatistics(0);
Statistics statistics = context.getStatisticsWithCheck();
Statistics childStatistics = context.getChildStatistics(0);
if (physicalQuickSort.getSortPhase().isGather()) {
// Now we do more like two-phase sort, so penalise one-phase sort
statistics.updateRowCount(statistics.getRowCount() * 100);
statistics = statistics.withRowCount(statistics.getRowCount() * 100);
}
return CostV1.of(
childStatistics.getRowCount(),
@ -131,11 +131,11 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
@Override
public Cost visitPhysicalTopN(PhysicalTopN<? extends Plan> topN, PlanContext context) {
// TODO: consider two-phase sort and enforcer.
StatsDeriveResult statistics = context.getStatisticsWithCheck();
StatsDeriveResult childStatistics = context.getChildStatistics(0);
Statistics statistics = context.getStatisticsWithCheck();
Statistics childStatistics = context.getChildStatistics(0);
if (topN.getSortPhase().isGather()) {
// Now we do more like two-phase sort, so penalise one-phase sort
statistics.updateRowCount(statistics.getRowCount() * 100);
statistics = statistics.withRowCount(statistics.getRowCount() * 100);
}
return CostV1.of(
childStatistics.getRowCount(),
@ -146,7 +146,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
@Override
public Cost visitPhysicalDistribute(
PhysicalDistribute<? extends Plan> distribute, PlanContext context) {
StatsDeriveResult childStatistics = context.getChildStatistics(0);
Statistics childStatistics = context.getChildStatistics(0);
DistributionSpec spec = distribute.getDistributionSpec();
// shuffle
if (spec instanceof DistributionSpecHash) {
@ -196,8 +196,8 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
PhysicalHashAggregate<? extends Plan> aggregate, PlanContext context) {
// TODO: stage.....
StatsDeriveResult statistics = context.getStatisticsWithCheck();
StatsDeriveResult inputStatistics = context.getChildStatistics(0);
Statistics statistics = context.getStatisticsWithCheck();
Statistics inputStatistics = context.getChildStatistics(0);
return CostV1.of(inputStatistics.getRowCount(), statistics.getRowCount(), 0);
}
@ -205,11 +205,11 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
public Cost visitPhysicalHashJoin(
PhysicalHashJoin<? extends Plan, ? extends Plan> physicalHashJoin, PlanContext context) {
Preconditions.checkState(context.arity() == 2);
StatsDeriveResult outputStats = context.getStatisticsWithCheck();
Statistics outputStats = context.getStatisticsWithCheck();
double outputRowCount = outputStats.getRowCount();
StatsDeriveResult probeStats = context.getChildStatistics(0);
StatsDeriveResult buildStats = context.getChildStatistics(1);
Statistics probeStats = context.getChildStatistics(0);
Statistics buildStats = context.getChildStatistics(1);
double leftRowCount = probeStats.getRowCount();
double rightRowCount = buildStats.getRowCount();
@ -227,7 +227,6 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
//penalty for right deep tree
penalty += rightRowCount;
}
if (physicalHashJoin.getJoinType().isCrossJoin()) {
return CostV1.of(leftRowCount + rightRowCount + outputRowCount,
0,
@ -248,8 +247,8 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
// TODO: copy from physicalHashJoin, should update according to physical nested loop join properties.
Preconditions.checkState(context.arity() == 2);
StatsDeriveResult leftStatistics = context.getChildStatistics(0);
StatsDeriveResult rightStatistics = context.getChildStatistics(1);
Statistics leftStatistics = context.getChildStatistics(0);
Statistics rightStatistics = context.getChildStatistics(1);
return CostV1.of(
leftStatistics.getRowCount() * rightStatistics.getRowCount(),
@ -269,7 +268,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
@Override
public Cost visitPhysicalGenerate(PhysicalGenerate<? extends Plan> generate, PlanContext context) {
StatsDeriveResult statistics = context.getStatisticsWithCheck();
Statistics statistics = context.getStatisticsWithCheck();
return CostV1.of(
statistics.getRowCount(),
statistics.getRowCount(),

View File

@ -28,7 +28,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.physical.PhysicalHashJoin;
import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin;
import org.apache.doris.nereids.util.JoinUtils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
@ -62,7 +62,7 @@ public class GraphSimplifier {
// It cached the plan stats in simplification. we don't store it in hyper graph,
// because it's just used for simulating join. In fact, the graph simplifier
// just generate the partial order of join operator.
private final HashMap<Long, StatsDeriveResult> cacheStats = new HashMap<>();
private final HashMap<Long, Statistics> cacheStats = new HashMap<>();
private final HashMap<Long, Cost> cacheCost = new HashMap<>();
private final Stack<SimplificationStep> appliedSteps = new Stack<>();
@ -311,8 +311,8 @@ public class GraphSimplifier {
long right1 = edge1.getRight();
long left2 = edge2.getLeft();
long right2 = edge2.getRight();
Pair<StatsDeriveResult, Edge> edge1Before2;
Pair<StatsDeriveResult, Edge> edge2Before1;
Pair<Statistics, Edge> edge1Before2;
Pair<Statistics, Edge> edge2Before1;
List<Long> superBitset = new ArrayList<>();
if (tryGetSuperset(left1, left2, superBitset)) {
// (common Join1 right1) Join2 right2
@ -342,15 +342,15 @@ public class GraphSimplifier {
return Optional.of(simplificationStep);
}
Pair<StatsDeriveResult, Edge> threeLeftJoin(long bitmap1, Edge edge1, long bitmap2, Edge edge2, long bitmap3) {
Pair<Statistics, Edge> threeLeftJoin(long bitmap1, Edge edge1, long bitmap2, Edge edge2, long bitmap3) {
// (plan1 edge1 plan2) edge2 plan3
// The join may have redundant table, e.g., t1,t2 join t3 join t2,t4
// Therefore, the cost is not accurate
Preconditions.checkArgument(
cacheStats.containsKey(bitmap1) && cacheStats.containsKey(bitmap2) && cacheStats.containsKey(bitmap3));
StatsDeriveResult leftStats = JoinEstimation.estimate(cacheStats.get(bitmap1), cacheStats.get(bitmap2),
Statistics leftStats = JoinEstimation.estimate(cacheStats.get(bitmap1), cacheStats.get(bitmap2),
edge1.getJoin());
StatsDeriveResult joinStats = JoinEstimation.estimate(leftStats, cacheStats.get(bitmap3), edge2.getJoin());
Statistics joinStats = JoinEstimation.estimate(leftStats, cacheStats.get(bitmap3), edge2.getJoin());
Edge edge = new Edge(edge2.getJoin(), -1);
long newLeft = LongBitmap.newBitmapUnion(bitmap1, bitmap2);
// To avoid overlapping the left and the right, the newLeft is calculated, Note the
@ -363,13 +363,13 @@ public class GraphSimplifier {
return Pair.of(joinStats, edge);
}
Pair<StatsDeriveResult, Edge> threeRightJoin(long bitmap1, Edge edge1, long bitmap2, Edge edge2, long bitmap3) {
Pair<Statistics, Edge> threeRightJoin(long bitmap1, Edge edge1, long bitmap2, Edge edge2, long bitmap3) {
Preconditions.checkArgument(
cacheStats.containsKey(bitmap1) && cacheStats.containsKey(bitmap2) && cacheStats.containsKey(bitmap3));
// plan1 edge1 (plan2 edge2 plan3)
StatsDeriveResult rightStats = JoinEstimation.estimate(cacheStats.get(bitmap2), cacheStats.get(bitmap3),
Statistics rightStats = JoinEstimation.estimate(cacheStats.get(bitmap2), cacheStats.get(bitmap3),
edge2.getJoin());
StatsDeriveResult joinStats = JoinEstimation.estimate(cacheStats.get(bitmap1), rightStats, edge1.getJoin());
Statistics joinStats = JoinEstimation.estimate(cacheStats.get(bitmap1), rightStats, edge1.getJoin());
Edge edge = new Edge(edge1.getJoin(), -1);
long newRight = LongBitmap.newBitmapUnion(bitmap2, bitmap3);
@ -381,8 +381,8 @@ public class GraphSimplifier {
return Pair.of(joinStats, edge);
}
private SimplificationStep orderJoin(Pair<StatsDeriveResult, Edge> edge1Before2,
Pair<StatsDeriveResult, Edge> edge2Before1, int edgeIndex1, int edgeIndex2) {
private SimplificationStep orderJoin(Pair<Statistics, Edge> edge1Before2,
Pair<Statistics, Edge> edge2Before1, int edgeIndex1, int edgeIndex2) {
Cost cost1Before2 = calCost(edge1Before2.second, edge1Before2.first,
cacheStats.get(edge1Before2.second.getLeft()),
cacheStats.get(edge1Before2.second.getRight()));
@ -423,8 +423,8 @@ public class GraphSimplifier {
return false;
}
private Cost calCost(Edge edge, StatsDeriveResult stats,
StatsDeriveResult leftStats, StatsDeriveResult rightStats) {
private Cost calCost(Edge edge, Statistics stats,
Statistics leftStats, Statistics rightStats) {
LogicalJoin join = edge.getJoin();
PlanContext planContext = new PlanContext(stats, leftStats, rightStats);
Cost cost = Cost.zero();

View File

@ -29,7 +29,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.trees.plans.physical.PhysicalDistribute;
import org.apache.doris.nereids.util.TreeStringUtils;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@ -65,7 +65,7 @@ public class Group {
private boolean isExplored = false;
private StatsDeriveResult statistics;
private Statistics statistics;
/**
* Constructor for Group.
@ -247,11 +247,11 @@ public class Group {
lowestCostPlans.putAll(needReplaceBestExpressions);
}
public StatsDeriveResult getStatistics() {
public Statistics getStatistics() {
return statistics;
}
public void setStatistics(StatsDeriveResult statistics) {
public void setStatistics(Statistics statistics) {
this.statistics = statistics;
}

View File

@ -30,7 +30,7 @@ import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator;
import org.apache.doris.nereids.trees.plans.ObjectId;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
@ -297,8 +297,8 @@ public class GroupExpression {
return Objects.hash(children, plan);
}
public StatsDeriveResult childStatistics(int idx) {
return new StatsDeriveResult(child(idx).getStatistics());
public Statistics childStatistics(int idx) {
return new Statistics(child(idx).getStatistics());
}
public void setEstOutputRowCount(long estOutputRowCount) {

View File

@ -40,7 +40,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@ -695,10 +695,10 @@ public class Memo {
for (Group group : groups.values()) {
builder.append("\n\n").append(group);
builder.append(" stats=").append(group.getStatistics()).append("\n");
StatsDeriveResult stats = group.getStatistics();
Statistics stats = group.getStatistics();
if (stats != null && !group.getLogicalExpressions().isEmpty()
&& group.getLogicalExpressions().get(0).getPlan() instanceof LogicalOlapScan) {
for (Entry e : stats.getSlotIdToColumnStats().entrySet()) {
for (Entry e : stats.columnStatistics().entrySet()) {
builder.append(" ").append(e.getKey()).append(":").append(e.getValue()).append("\n");
}
}

View File

@ -19,26 +19,26 @@ package org.apache.doris.nereids.metrics.event;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
/**
* stats state event
*/
public class StatsStateEvent extends StateEvent {
private final StatsDeriveResult statsDeriveResult;
private final Statistics statistics;
private StatsStateEvent(GroupExpression groupExpression, StatsDeriveResult statsDeriveResult) {
private StatsStateEvent(GroupExpression groupExpression, Statistics statistics) {
super(groupExpression);
this.statsDeriveResult = statsDeriveResult;
this.statistics = statistics;
}
public static StatsStateEvent of(GroupExpression groupExpression, StatsDeriveResult statsDeriveResult) {
public static StatsStateEvent of(GroupExpression groupExpression, Statistics statistics) {
return checkConnectContext(StatsStateEvent.class)
? new StatsStateEvent(groupExpression, statsDeriveResult) : null;
? new StatsStateEvent(groupExpression, statistics) : null;
}
@Override
public String toString() {
return Utils.toSqlString("StatsStateEvent", "statsDeriveResult", statsDeriveResult);
return Utils.toSqlString("StatsStateEvent", "Statistics", statistics);
}
}

View File

@ -35,7 +35,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalQuickSort;
import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation;
import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.List;
import java.util.Set;
@ -173,8 +173,8 @@ public class RuntimeFilterPruner extends PlanPostProcessor {
* @return true if runtime-filter is effective
*/
private boolean isEffectiveRuntimeFilter(EqualTo equalTo, PhysicalHashJoin join) {
StatsDeriveResult leftStats = ((AbstractPlan) join.child(0)).getStats();
StatsDeriveResult rightStats = ((AbstractPlan) join.child(1)).getStats();
Statistics leftStats = ((AbstractPlan) join.child(0)).getStats();
Statistics rightStats = ((AbstractPlan) join.child(1)).getStats();
Set<Slot> leftSlots = equalTo.child(0).getInputSlots();
if (leftSlots.size() > 1) {
return false;
@ -185,12 +185,12 @@ public class RuntimeFilterPruner extends PlanPostProcessor {
}
Slot leftSlot = leftSlots.iterator().next();
Slot rightSlot = rightSlots.iterator().next();
ColumnStatistic probeColumnStat = leftStats.getColumnStatsBySlot(leftSlot);
ColumnStatistic buildColumnStat = rightStats.getColumnStatsBySlot(rightSlot);
ColumnStatistic probeColumnStat = leftStats.findColumnStatistics(leftSlot);
ColumnStatistic buildColumnStat = rightStats.findColumnStatistics(rightSlot);
//TODO remove these code when we ensure left child if from probe side
if (probeColumnStat == null || buildColumnStat == null) {
probeColumnStat = leftStats.getColumnStatsBySlot(rightSlot);
buildColumnStat = rightStats.getColumnStatsBySlot(leftSlot);
probeColumnStat = leftStats.findColumnStatistics(rightSlot);
buildColumnStat = rightStats.findColumnStatistics(leftSlot);
if (probeColumnStat == null || buildColumnStat == null) {
return false;
}

View File

@ -23,10 +23,13 @@ import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.BinaryArithmetic;
import org.apache.doris.nereids.trees.expressions.CaseWhen;
import org.apache.doris.nereids.trees.expressions.Cast;
import org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
import org.apache.doris.nereids.trees.expressions.CompoundPredicate;
import org.apache.doris.nereids.trees.expressions.Divide;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.IntegralDivide;
import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference;
import org.apache.doris.nereids.trees.expressions.Mod;
import org.apache.doris.nereids.trees.expressions.Multiply;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.Subtract;
@ -38,41 +41,95 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
import org.apache.doris.nereids.trees.expressions.functions.agg.Max;
import org.apache.doris.nereids.trees.expressions.functions.agg.Min;
import org.apache.doris.nereids.trees.expressions.functions.agg.Sum;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Abs;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Acos;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Ascii;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Asin;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Atan;
import org.apache.doris.nereids.trees.expressions.functions.scalar.DayOfMonth;
import org.apache.doris.nereids.trees.expressions.functions.scalar.DayOfWeek;
import org.apache.doris.nereids.trees.expressions.functions.scalar.DayOfYear;
import org.apache.doris.nereids.trees.expressions.functions.scalar.DaysAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.DaysDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.DaysSub;
import org.apache.doris.nereids.trees.expressions.functions.scalar.FromDays;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Hour;
import org.apache.doris.nereids.trees.expressions.functions.scalar.HoursDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.HoursSub;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Least;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Minute;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinutesAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinutesDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinutesSub;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MonthsAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MonthsDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MonthsSub;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Negative;
import org.apache.doris.nereids.trees.expressions.functions.scalar.NullIf;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Quarter;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Radians;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Random;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Second;
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsSub;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sqrt;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ToDate;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ToDays;
import org.apache.doris.nereids.trees.expressions.functions.scalar.WeekOfYear;
import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Year;
import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsSub;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import org.apache.commons.collections.CollectionUtils;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.List;
/**
* Used to estimate for expressions that not producing boolean value.
*/
public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, StatsDeriveResult> {
public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Statistics> {
private static ExpressionEstimation INSTANCE = new ExpressionEstimation();
public static final long DAYS_FROM_0_TO_1970 = 719528;
public static final long DAYS_FROM_0_TO_9999 = 3652424;
private static final ExpressionEstimation INSTANCE = new ExpressionEstimation();
/**
* returned columnStat is newly created or a copy of stats
*/
public static ColumnStatistic estimate(Expression expression, StatsDeriveResult stats) {
return INSTANCE.visit(expression, stats);
public static ColumnStatistic estimate(Expression expression, Statistics stats) {
ColumnStatistic columnStatistic = expression.accept(INSTANCE, stats);
if (columnStatistic == null) {
return ColumnStatistic.UNKNOWN;
}
return columnStatistic;
}
@Override
public ColumnStatistic visit(Expression expr, StatsDeriveResult context) {
return expr.accept(this, context);
public ColumnStatistic visit(Expression expr, Statistics context) {
List<Expression> childrenExpr = expr.children();
if (CollectionUtils.isEmpty(childrenExpr)) {
return ColumnStatistic.UNKNOWN;
}
return expr.child(0).accept(this, context);
}
//TODO: case-when need to re-implemented
@Override
public ColumnStatistic visitCaseWhen(CaseWhen caseWhen, StatsDeriveResult context) {
public ColumnStatistic visitCaseWhen(CaseWhen caseWhen, Statistics context) {
ColumnStatisticBuilder columnStat = new ColumnStatisticBuilder();
columnStat.setNdv(caseWhen.getWhenClauses().size() + 1);
columnStat.setMinValue(0);
@ -82,14 +139,14 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
return columnStat.build();
}
public ColumnStatistic visitCast(Cast cast, StatsDeriveResult context) {
public ColumnStatistic visitCast(Cast cast, Statistics context) {
return cast.child().accept(this, context);
}
@Override
public ColumnStatistic visitLiteral(Literal literal, StatsDeriveResult context) {
public ColumnStatistic visitLiteral(Literal literal, Statistics context) {
if (ColumnStatistic.MAX_MIN_UNSUPPORTED_TYPE.contains(literal.getDataType().toCatalogDataType())) {
return ColumnStatistic.DEFAULT;
return ColumnStatistic.UNKNOWN;
}
double literalVal = literal.getDouble();
ColumnStatisticBuilder columnStatBuilder = new ColumnStatisticBuilder();
@ -102,14 +159,12 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
}
@Override
public ColumnStatistic visitSlotReference(SlotReference slotReference, StatsDeriveResult context) {
ColumnStatistic columnStat = context.getColumnStatsBySlot(slotReference);
Preconditions.checkState(columnStat != null);
return columnStat.copy();
public ColumnStatistic visitSlotReference(SlotReference slotReference, Statistics context) {
return context.findColumnStatistics(slotReference);
}
@Override
public ColumnStatistic visitBinaryArithmetic(BinaryArithmetic binaryArithmetic, StatsDeriveResult context) {
public ColumnStatistic visitBinaryArithmetic(BinaryArithmetic binaryArithmetic, Statistics context) {
ColumnStatistic leftColStats = binaryArithmetic.left().accept(this, context);
ColumnStatistic rightColStats = binaryArithmetic.right().accept(this, context);
double leftNdv = leftColStats.ndv;
@ -124,7 +179,8 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
double rightMax = rightColStats.maxValue;
double leftMin = leftColStats.minValue;
double rightMin = rightColStats.minValue;
double dataSize = binaryArithmetic.getDataType().width() * rowCount;
int exprResultTypeWidth = binaryArithmetic.getDataType().width();
double dataSize = exprResultTypeWidth * rowCount;
if (binaryArithmetic instanceof Add) {
return new ColumnStatisticBuilder().setCount(rowCount).setNdv(ndv).setAvgSizeByte(leftColStats.avgSizeByte)
.setNumNulls(numNulls).setDataSize(dataSize).setMinValue(leftMin + rightMin)
@ -166,9 +222,21 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
leftMax / noneZeroDivisor(rightMax));
return new ColumnStatisticBuilder().setCount(rowCount).setNdv(ndv).setAvgSizeByte(leftColStats.avgSizeByte)
.setNumNulls(numNulls).setDataSize(binaryArithmetic.getDataType().width()).setMinValue(min)
.setMaxValue(max).setSelectivity(1.0).setMaxExpr(null).setMinExpr(null).build();
.setMaxValue(max).setSelectivity(1.0).build();
}
return ColumnStatistic.DEFAULT;
if (binaryArithmetic instanceof Mod) {
double min = -Math.max(Math.abs(rightMin), Math.abs(rightMax));
double max = -min;
return new ColumnStatisticBuilder().setCount(rowCount).setNdv(ndv)
.setAvgSizeByte(exprResultTypeWidth)
.setDataSize(dataSize)
.setNumNulls(numNulls)
.setMaxValue(max)
.setMinValue(min)
.build();
}
return ColumnStatistic.UNKNOWN;
}
private double noneZeroDivisor(double d) {
@ -176,11 +244,11 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
}
@Override
public ColumnStatistic visitMin(Min min, StatsDeriveResult context) {
public ColumnStatistic visitMin(Min min, Statistics context) {
Expression child = min.child();
ColumnStatistic columnStat = child.accept(this, context);
if (columnStat == ColumnStatistic.DEFAULT) {
return ColumnStatistic.DEFAULT;
if (columnStat == ColumnStatistic.UNKNOWN) {
return ColumnStatistic.UNKNOWN;
}
/*
we keep columnStat.min and columnStat.max, but set ndv=1.
@ -194,11 +262,11 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
}
@Override
public ColumnStatistic visitMax(Max max, StatsDeriveResult context) {
public ColumnStatistic visitMax(Max max, Statistics context) {
Expression child = max.child();
ColumnStatistic columnStat = child.accept(this, context);
if (columnStat == ColumnStatistic.DEFAULT) {
return ColumnStatistic.DEFAULT;
if (columnStat == ColumnStatistic.UNKNOWN) {
return ColumnStatistic.UNKNOWN;
}
/*
we keep columnStat.min and columnStat.max, but set ndv=1.
@ -211,82 +279,108 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
}
@Override
public ColumnStatistic visitCount(Count count, StatsDeriveResult context) {
if (count.isStar()) {
return ColumnStatistic.DEFAULT;
}
Expression child = count.child(0);
ColumnStatistic columnStat = child.accept(this, context);
if (columnStat == ColumnStatistic.DEFAULT) {
return ColumnStatistic.DEFAULT;
}
double expectedValue = context.getRowCount() - columnStat.numNulls;
double width = (double) count.getDataType().width();
return new ColumnStatisticBuilder().setCount(1D).setNdv(1D).setAvgSizeByte(width).setNumNulls(0)
.setDataSize(width).setMinValue(expectedValue).setMaxValue(expectedValue).setSelectivity(1.0)
public ColumnStatistic visitCount(Count count, Statistics context) {
double width = count.getDataType().width();
return new ColumnStatisticBuilder().setCount(1D).setAvgSizeByte(width).setNumNulls(0)
.setDataSize(width).setMinValue(0).setMaxValue(context.getRowCount()).setSelectivity(1.0)
.setMaxExpr(null).setMinExpr(null).build();
}
// TODO: return a proper estimated stat after supports histogram
@Override
public ColumnStatistic visitSum(Sum sum, StatsDeriveResult context) {
return sum.child().accept(this, context);
public ColumnStatistic visitSum(Sum sum, Statistics context) {
ColumnStatistic columnStatistic = sum.child().accept(this, context);
return new ColumnStatisticBuilder(columnStatistic)
.setMaxValue(columnStatistic.maxValue * context.getRowCount() / columnStatistic.ndv)
.setMinValue(columnStatistic.minValue * context.getRowCount() / columnStatistic.ndv)
.setAvgSizeByte(sum.getDataType().width())
.setDataSize(sum.getDataType().width() * context.getRowCount()).build();
}
// TODO: return a proper estimated stat after supports histogram
@Override
public ColumnStatistic visitAvg(Avg avg, StatsDeriveResult context) {
public ColumnStatistic visitAvg(Avg avg, Statistics context) {
return avg.child().accept(this, context);
}
@Override
public ColumnStatistic visitYear(Year year, StatsDeriveResult context) {
public ColumnStatistic visitYear(Year year, Statistics context) {
ColumnStatistic childStat = year.child().accept(this, context);
long minYear = LocalDateTime.MIN.getYear();
long maxYear = LocalDateTime.MAX.getYear();
return new ColumnStatisticBuilder().setCount(childStat.count).setNdv(childStat.ndv).setAvgSizeByte(4)
.setNumNulls(childStat.numNulls).setDataSize(maxYear - minYear + 1).setMinValue(minYear)
long minYear = 1970;
long maxYear = 2038;
return new ColumnStatisticBuilder()
.setCount(childStat.count)
.setNdv(maxYear - minYear + 1)
.setAvgSizeByte(4)
.setNumNulls(childStat.numNulls)
.setDataSize(4 * childStat.count)
.setMinValue(minYear)
.setMaxValue(maxYear).setSelectivity(1.0).setMinExpr(null).build();
}
@Override
public ColumnStatistic visitWeekOfYear(WeekOfYear weekOfYear, StatsDeriveResult context) {
public ColumnStatistic visitWeekOfYear(WeekOfYear weekOfYear, Statistics context) {
ColumnStatistic childStat = weekOfYear.child().accept(this, context);
double width = weekOfYear.getDataType().width();
return new ColumnStatisticBuilder().setCount(52).setNdv(2).setAvgSizeByte(width).setNumNulls(childStat.numNulls)
.setDataSize(1).setMinValue(1).setMaxValue(52).setSelectivity(1.0).setMinExpr(null)
return new ColumnStatisticBuilder(childStat)
.setNdv(54)
.setAvgSizeByte(width)
.setNumNulls(childStat.numNulls)
.setDataSize(1).setMinValue(1).setMaxValue(53).setSelectivity(1.0).setMinExpr(null)
.build();
}
// TODO: find a proper way to predicate stat of substring
@Override
public ColumnStatistic visitSubstring(Substring substring, StatsDeriveResult context) {
public ColumnStatistic visitSubstring(Substring substring, Statistics context) {
return substring.child(0).accept(this, context);
}
@Override
public ColumnStatistic visitAlias(Alias alias, StatsDeriveResult context) {
public ColumnStatistic visitAlias(Alias alias, Statistics context) {
return alias.child().accept(this, context);
}
@Override
public ColumnStatistic visitVirtualReference(VirtualSlotReference virtualSlotReference, StatsDeriveResult context) {
return ColumnStatistic.DEFAULT;
public ColumnStatistic visitVirtualReference(VirtualSlotReference virtualSlotReference, Statistics context) {
return ColumnStatistic.UNKNOWN;
}
@Override
public ColumnStatistic visitBoundFunction(BoundFunction boundFunction, StatsDeriveResult context) {
return ColumnStatistic.DEFAULT;
public ColumnStatistic visitBoundFunction(BoundFunction boundFunction, Statistics context) {
return ColumnStatistic.UNKNOWN;
}
@Override
public ColumnStatistic visitAggregateExpression(AggregateExpression aggregateExpression,
StatsDeriveResult context) {
Statistics context) {
return aggregateExpression.child().accept(this, context);
}
@Override
public ColumnStatistic visitTimestampArithmetic(TimestampArithmetic arithmetic, StatsDeriveResult context) {
public ColumnStatistic visitComparisonPredicate(ComparisonPredicate cp, Statistics context) {
ColumnStatistic leftStats = cp.left().accept(this, context);
ColumnStatistic rightStats = cp.right().accept(this, context);
return new ColumnStatisticBuilder(leftStats)
.setNumNulls(StatsMathUtil.maxNonNaN(leftStats.numNulls, rightStats.numNulls))
.setHistogram(null)
.setNdv(2).build();
}
@Override
public ColumnStatistic visitCompoundPredicate(CompoundPredicate compoundPredicate, Statistics context) {
List<Expression> childExprs = compoundPredicate.children();
ColumnStatistic firstChild = childExprs.get(0).accept(this, context);
double maxNull = StatsMathUtil.maxNonNaN(firstChild.numNulls, 1);
for (int i = 1; i < childExprs.size(); i++) {
ColumnStatistic columnStatistic = childExprs.get(i).accept(this, context);
maxNull = StatsMathUtil.maxNonNaN(maxNull, columnStatistic.numNulls);
}
return new ColumnStatisticBuilder(firstChild).setNumNulls(maxNull).setNdv(2).setHistogram(null).build();
}
@Override
public ColumnStatistic visitTimestampArithmetic(TimestampArithmetic arithmetic, Statistics context) {
ColumnStatistic colStat = arithmetic.child(0).accept(this, context);
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(colStat);
builder.setMinValue(Double.MIN_VALUE);
@ -297,7 +391,398 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
@Override
public ColumnStatistic visitMarkJoinReference(
MarkJoinSlotReference markJoinSlotReference, StatsDeriveResult context) {
return ColumnStatistic.DEFAULT;
MarkJoinSlotReference markJoinSlotReference, Statistics context) {
return ColumnStatistic.UNKNOWN;
}
public ColumnStatistic visitNullIf(NullIf nullIf, Statistics context) {
Expression leftChild = nullIf.left();
return leftChild.accept(this, context);
}
@Override
public ColumnStatistic visitLeast(Least least, Statistics context) {
return least.child(0).accept(this, context);
}
@Override
public ColumnStatistic visitAscii(Ascii ascii, Statistics context) {
DataType returnType = ascii.getDataType();
ColumnStatistic childColumnStats = ascii.child().accept(this, context);
return new ColumnStatisticBuilder()
.setDataSize(returnType.width() * context.getRowCount())
.setNdv(128)
.setMinValue(0)
.setMaxValue(127)
.setNumNulls(childColumnStats.numNulls)
.setAvgSizeByte(returnType.width()).build();
}
@Override
public ColumnStatistic visitQuarter(Quarter quarter, Statistics context) {
DataType returnType = quarter.getDataType();
ColumnStatistic childColumnStats = quarter.child().accept(this, context);
return new ColumnStatisticBuilder()
.setNdv(4)
.setMinValue(1)
.setMaxValue(4)
.setNumNulls(childColumnStats.numNulls)
.setAvgSizeByte(returnType.width())
.setDataSize(returnType.width() * context.getRowCount()).build();
}
@Override
public ColumnStatistic visitDayOfMonth(DayOfMonth dayOfMonth, Statistics context) {
DataType returnType = dayOfMonth.getDataType();
ColumnStatistic childColumnStats = dayOfMonth.child().accept(this, context);
return new ColumnStatisticBuilder(childColumnStats).setNdv(31)
.setAvgSizeByte(returnType.width())
.setDataSize(returnType.width() * context.getRowCount())
.setMaxValue(1)
.setMaxValue(31).build();
}
@Override
public ColumnStatistic visitDayOfWeek(DayOfWeek dayOfWeek, Statistics context) {
ColumnStatistic childColumnStats = dayOfWeek.child().accept(this, context);
return new ColumnStatisticBuilder(childColumnStats)
.setNdv(7)
.setMinValue(1)
.setMaxValue(7).build();
}
@Override
public ColumnStatistic visitDayOfYear(DayOfYear dayOfYear, Statistics context) {
ColumnStatistic childColumnStats = dayOfYear.child().accept(this, context);
return new ColumnStatisticBuilder(childColumnStats)
.setNdv(366)
.setMaxValue(366)
.setAvgSizeByte(dayOfYear.getDataType().width())
.setDataSize(dayOfYear.getDataType().width() * context.getRowCount())
.setMinValue(1)
.build();
}
@Override
public ColumnStatistic visitHour(Hour hour, Statistics context) {
ColumnStatistic childColumnStats = hour.child().accept(this, context);
return new ColumnStatisticBuilder(childColumnStats)
.setNdv(24)
.setMinValue(0)
.setAvgSizeByte(hour.getDataType().width())
.setDataSize(hour.getDataType().width() * context.getRowCount())
.setMaxValue(23).build();
}
@Override
public ColumnStatistic visitMinute(Minute minute, Statistics context) {
ColumnStatistic childColumnStats = minute.child().accept(this, context);
return new ColumnStatisticBuilder(childColumnStats)
.setNdv(60)
.setMinValue(0)
.setAvgSizeByte(minute.getDataType().width())
.setDataSize(minute.getDataType().width() * context.getRowCount())
.setMaxValue(59).build();
}
@Override
public ColumnStatistic visitSecond(Second second, Statistics context) {
ColumnStatistic childColumnStats = second.child().accept(this, context);
return new ColumnStatisticBuilder(childColumnStats)
.setNdv(60)
.setMinValue(0)
.setAvgSizeByte(second.getDataType().width())
.setDataSize(second.getDataType().width() * context.getRowCount())
.setMaxValue(59).build();
}
@Override
public ColumnStatistic visitToDate(ToDate toDate, Statistics context) {
ColumnStatistic childColumnStats = toDate.child().accept(this, context);
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(childColumnStats)
.setAvgSizeByte(toDate.getDataType().width())
.setDataSize(toDate.getDataType().width() * context.getRowCount());
if (childColumnStats.minOrMaxIsInf()) {
return columnStatisticBuilder.build();
}
double minValue = getDatetimeFromLong((long) childColumnStats.minValue).toLocalDate()
.atStartOfDay(ZoneId.systemDefault()).toEpochSecond();
double maxValue = getDatetimeFromLong((long) childColumnStats.maxValue).toLocalDate()
.atStartOfDay(ZoneId.systemDefault()).toEpochSecond();
return columnStatisticBuilder.setMaxValue(maxValue)
.setMinValue(minValue)
.build();
}
private LocalDateTime getDatetimeFromLong(long dateTime) {
return LocalDateTime.ofInstant(Instant.ofEpochSecond(dateTime), ZoneId.systemDefault());
}
@Override
public ColumnStatistic visitToDays(ToDays toDays, Statistics context) {
ColumnStatistic childColumnStats = toDays.child().accept(this, context);
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(childColumnStats)
.setAvgSizeByte(toDays.getDataType().width())
.setDataSize(toDays.getDataType().width() * context.getRowCount());
if (childColumnStats.minOrMaxIsInf()) {
return columnStatisticBuilder.build();
}
double minValue = getDatetimeFromLong((long) childColumnStats.minValue).toLocalDate().toEpochDay()
+ (double) DAYS_FROM_0_TO_1970;
double maxValue = getDatetimeFromLong((long) childColumnStats.maxValue).toLocalDate().toEpochDay()
+ (double) DAYS_FROM_0_TO_1970;
return columnStatisticBuilder.setMaxValue(maxValue)
.setMinValue(minValue)
.build();
}
@Override
public ColumnStatistic visitFromDays(FromDays fromDays, Statistics context) {
ColumnStatistic childColumnStats = fromDays.child().accept(this, context);
double minValue = childColumnStats.minValue;
double maxValue = childColumnStats.maxValue;
if (minValue < DAYS_FROM_0_TO_1970) {
minValue = LocalDate.ofEpochDay(0).atStartOfDay(ZoneId.systemDefault()).toEpochSecond();
} else {
if (minValue > DAYS_FROM_0_TO_9999) {
minValue = LocalDate.ofEpochDay(DAYS_FROM_0_TO_9999 - DAYS_FROM_0_TO_1970)
.atStartOfDay(ZoneId.systemDefault()).toEpochSecond();
} else {
minValue = LocalDate.ofEpochDay((long) (minValue - DAYS_FROM_0_TO_1970))
.atStartOfDay(ZoneId.systemDefault()).toEpochSecond();
}
}
if (maxValue < DAYS_FROM_0_TO_1970) {
maxValue = LocalDate.ofEpochDay(0).atStartOfDay(ZoneId.systemDefault()).toEpochSecond();
} else {
if (maxValue > DAYS_FROM_0_TO_9999) {
maxValue = LocalDate.ofEpochDay(DAYS_FROM_0_TO_9999 - DAYS_FROM_0_TO_1970)
.atStartOfDay(ZoneId.systemDefault()).toEpochSecond();
} else {
maxValue = LocalDate.ofEpochDay((long) (maxValue - DAYS_FROM_0_TO_1970))
.atStartOfDay(ZoneId.systemDefault()).toEpochSecond();
}
}
return new ColumnStatisticBuilder(childColumnStats)
.setMinValue(minValue)
.setMaxValue(maxValue)
.setAvgSizeByte(fromDays.getDataType().width())
.setDataSize(fromDays.getDataType().width() * context.getRowCount()).build();
}
@Override
public ColumnStatistic visitAbs(Abs abs, Statistics context) {
ColumnStatistic childColumnStats = abs.child().accept(this, context);
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(childColumnStats);
double max = Math.max(Math.abs(childColumnStats.minValue), Math.abs(childColumnStats.maxValue));
double min;
if (childColumnStats.minValue < 0 && childColumnStats.maxValue < 0
|| childColumnStats.minValue >= 0 && childColumnStats.maxValue >= 0) {
min = Math.min(childColumnStats.minValue, childColumnStats.maxValue);
} else {
min = 0;
}
return columnStatisticBuilder
.setMinValue(min)
.setMaxValue(max)
.setNdv(max - min + 1)
.setAvgSizeByte(abs.getDataType().width())
.setDataSize(abs.getDataType().width() * context.getRowCount()).build();
}
@Override
public ColumnStatistic visitAcos(Acos acos, Statistics context) {
ColumnStatistic childColumnStats = acos.child().accept(this, context);
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(childColumnStats);
return columnStatisticBuilder
.setMinValue(0)
.setAvgSizeByte(acos.getDataType().width())
.setDataSize(acos.getDataType().width() * context.getRowCount())
.setMaxValue(Math.PI).build();
}
@Override
public ColumnStatistic visitAsin(Asin asin, Statistics context) {
ColumnStatistic columnStatistic = asin.child().accept(this, context);
return new ColumnStatisticBuilder(columnStatistic)
.setMinValue(-Math.PI / 2)
.setMaxValue(Math.PI / 2)
.setAvgSizeByte(asin.getDataType().width())
.setDataSize(asin.getDataType().width() * context.getRowCount()).build();
}
@Override
public ColumnStatistic visitAtan(Atan atan, Statistics context) {
ColumnStatistic columnStatistic = atan.child().accept(this, context);
return new ColumnStatisticBuilder(columnStatistic)
.setMinValue(-Math.PI / 2)
.setMaxValue(Math.PI / 2)
.setAvgSizeByte(atan.getDataType().width())
.setDataSize(atan.getDataType().width() * context.getRowCount()).build();
}
@Override
public ColumnStatistic visitSqrt(Sqrt sqrt, Statistics context) {
ColumnStatistic columnStatistic = sqrt.child().accept(this, context);
return new ColumnStatisticBuilder(columnStatistic)
.setMinValue(0)
.setMaxValue(Math.sqrt(columnStatistic.maxValue))
.setAvgSizeByte(sqrt.getDataType().width())
.setDataSize(sqrt.getDataType().width() * context.getRowCount()).build();
}
@Override
public ColumnStatistic visitRadians(Radians radians, Statistics context) {
ColumnStatistic columnStatistic = radians.child().accept(this, context);
return new ColumnStatisticBuilder(columnStatistic)
.setMinValue(Math.toRadians(columnStatistic.minValue))
.setMaxValue(Math.toRadians(columnStatistic.maxValue))
.setAvgSizeByte(radians.getDataType().width())
.setDataSize(radians.getDataType().width() * context.getRowCount()).build();
}
@Override
public ColumnStatistic visitRandom(Random random, Statistics context) {
return new ColumnStatisticBuilder()
.setMinValue(0)
.setMaxValue(1)
.setNumNulls(0)
.setHistogram(null)
.setAvgSizeByte(random.getDataType().width())
.setDataSize(random.getDataType().width() * context.getRowCount()).build();
}
@Override
public ColumnStatistic visitNegative(Negative negative, Statistics context) {
ColumnStatistic columnStatistic = negative.child(0).accept(this, context);
return new ColumnStatisticBuilder(columnStatistic)
.setMinValue(Math.min(-columnStatistic.minValue, -columnStatistic.maxValue))
.setMaxValue(Math.max(-columnStatistic.minValue, -columnStatistic.maxValue))
.setAvgSizeByte(negative.getDataType().width())
.setDataSize(negative.getDataType().width() * context.getRowCount()).build();
}
@Override
public ColumnStatistic visitYearsAdd(YearsAdd yearsAdd, Statistics context) {
return dateAdd(yearsAdd, context);
}
@Override
public ColumnStatistic visitMonthsAdd(MonthsAdd monthsAdd, Statistics context) {
return dateAdd(monthsAdd, context);
}
@Override
public ColumnStatistic visitDaysAdd(DaysAdd daysAdd, Statistics context) {
return dateAdd(daysAdd, context);
}
@Override
public ColumnStatistic visitMinutesAdd(MinutesAdd minutesAdd, Statistics context) {
return dateAdd(minutesAdd, context);
}
@Override
public ColumnStatistic visitSecondsAdd(SecondsAdd secondsAdd, Statistics context) {
return dateAdd(secondsAdd, context);
}
@Override
public ColumnStatistic visitYearsSub(YearsSub yearsSub, Statistics context) {
return dateSub(yearsSub, context);
}
@Override
public ColumnStatistic visitMonthsSub(MonthsSub monthsSub, Statistics context) {
return dateSub(monthsSub, context);
}
@Override
public ColumnStatistic visitDaysSub(DaysSub daysSub, Statistics context) {
return dateSub(daysSub, context);
}
@Override
public ColumnStatistic visitHoursSub(HoursSub hoursSub, Statistics context) {
return dateSub(hoursSub, context);
}
@Override
public ColumnStatistic visitMinutesSub(MinutesSub minutesSub, Statistics context) {
return dateSub(minutesSub, context);
}
@Override
public ColumnStatistic visitSecondsSub(SecondsSub secondsSub, Statistics context) {
return dateSub(secondsSub, context);
}
private ColumnStatistic dateAdd(Expression date, Statistics context) {
ColumnStatistic leftChild = date.child(0).accept(this, context);
ColumnStatistic rightChild = date.child(1).accept(this, context);
return new ColumnStatisticBuilder(leftChild)
.setMinValue(leftChild.minValue + rightChild.minValue)
.setMaxValue(leftChild.maxValue + rightChild.maxValue)
.setAvgSizeByte(date.getDataType().width())
.setDataSize(date.getDataType().width() * context.getRowCount()).build();
}
private ColumnStatistic dateSub(Expression date, Statistics context) {
ColumnStatistic leftChild = date.child(0).accept(this, context);
ColumnStatistic rightChild = date.child(1).accept(this, context);
return new ColumnStatisticBuilder(leftChild)
.setMinValue(leftChild.minValue - rightChild.minValue)
.setMaxValue(leftChild.maxValue - rightChild.maxValue)
.setAvgSizeByte(date.getDataType().width())
.setDataSize(date.getDataType().width() * context.getRowCount()).build();
}
private ColumnStatistic dateDiff(double interval, Expression date, Statistics context) {
ColumnStatistic leftChild = date.child(0).accept(this, context);
ColumnStatistic rightChild = date.child(1).accept(this, context);
return new ColumnStatisticBuilder(leftChild)
.setMinValue((leftChild.minValue - rightChild.maxValue) / interval)
.setMaxValue((leftChild.maxValue - rightChild.minValue) / interval)
.setAvgSizeByte(date.getDataType().width())
.setDataSize(date.getDataType().width() * context.getRowCount()).build();
}
@Override
public ColumnStatistic visitYearsDiff(YearsDiff yearsDiff, Statistics context) {
return dateDiff(3600 * 24 * 365, yearsDiff, context);
}
@Override
public ColumnStatistic visitMonthsDiff(MonthsDiff monthsDiff, Statistics context) {
return dateDiff(3600 * 24 * 31, monthsDiff, context);
}
@Override
public ColumnStatistic visitWeeksDiff(WeeksDiff weeksDiff, Statistics context) {
return dateDiff(3600 * 24 * 7, weeksDiff, context);
}
@Override
public ColumnStatistic visitDaysDiff(DaysDiff daysDiff, Statistics context) {
return dateDiff(3600 * 24, daysDiff, context);
}
@Override
public ColumnStatistic visitHoursDiff(HoursDiff hoursDiff, Statistics context) {
return dateDiff(3600, hoursDiff, context);
}
@Override
public ColumnStatistic visitMinutesDiff(MinutesDiff minutesDiff, Statistics context) {
return dateDiff(60, minutesDiff, context);
}
@Override
public ColumnStatistic visitSecondsDiff(SecondsDiff secondsDiff, Statistics context) {
return dateDiff(1, secondsDiff, context);
}
}

View File

@ -17,7 +17,6 @@
package org.apache.doris.nereids.stats;
import org.apache.doris.common.Id;
import org.apache.doris.nereids.stats.FilterEstimation.EstimationContext;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
@ -32,16 +31,21 @@ import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.coercion.NumericType;
import org.apache.doris.statistics.Bucket;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Histogram;
import org.apache.doris.statistics.HistogramBuilder;
import org.apache.doris.statistics.StatisticRange;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@ -49,51 +53,41 @@ import java.util.Map;
* Calculate selectivity of expression that produces boolean value.
* TODO: Should consider the distribution of data.
*/
public class FilterEstimation extends ExpressionVisitor<StatsDeriveResult, EstimationContext> {
public static final double DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY = 0.8;
public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationContext> {
public static final double DEFAULT_INEQUALITY_COEFFICIENT = 0.5;
public static final double DEFAULT_EQUALITY_COMPARISON_SELECTIVITY = 0.1;
private final StatsDeriveResult inputStats;
public FilterEstimation(StatsDeriveResult inputStats) {
Preconditions.checkNotNull(inputStats);
this.inputStats = inputStats;
}
/**
* This method will update the stats according to the selectivity.
*/
public StatsDeriveResult estimate(Expression expression) {
public Statistics estimate(Expression expression, Statistics statistics) {
// For a comparison predicate, only when it's left side is a slot and right side is a literal, we would
// consider is a valid predicate.
return calculate(expression);
}
private StatsDeriveResult calculate(Expression expression) {
return expression.accept(this, null);
return expression.accept(this, new EstimationContext(false, statistics));
}
@Override
public StatsDeriveResult visit(Expression expr, EstimationContext context) {
return inputStats.withSelectivity(DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY);
public Statistics visit(Expression expr, EstimationContext context) {
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
}
@Override
public StatsDeriveResult visitCompoundPredicate(CompoundPredicate predicate, EstimationContext context) {
public Statistics visitCompoundPredicate(CompoundPredicate predicate, EstimationContext context) {
Expression leftExpr = predicate.child(0);
Expression rightExpr = predicate.child(1);
StatsDeriveResult leftStats = leftExpr.accept(this, null);
Statistics leftStats = leftExpr.accept(this, context);
Statistics andStats = rightExpr.accept(new FilterEstimation(),
new EstimationContext(context.isNot, leftStats));
if (predicate instanceof And) {
return rightExpr.accept(new FilterEstimation(leftStats), null);
return andStats;
} else if (predicate instanceof Or) {
StatsDeriveResult rightStats = rightExpr.accept(this, null);
StatsDeriveResult andStats = rightExpr.accept(new FilterEstimation(leftStats), null);
Statistics rightStats = rightExpr.accept(this, context);
double rowCount = leftStats.getRowCount() + rightStats.getRowCount() - andStats.getRowCount();
StatsDeriveResult orStats = inputStats.updateRowCount(rowCount);
for (Map.Entry<Id, ColumnStatistic> entry : leftStats.getSlotIdToColumnStats().entrySet()) {
Statistics orStats = context.statistics.withRowCount(rowCount);
for (Map.Entry<Expression, ColumnStatistic> entry : leftStats.columnStatistics().entrySet()) {
ColumnStatistic leftColStats = entry.getValue();
ColumnStatistic rightColStats = rightStats.getColumnStatsBySlotId(entry.getKey());
ColumnStatistic rightColStats = rightStats.findColumnStatistics(entry.getKey());
ColumnStatisticBuilder estimatedColStatsBuilder = new ColumnStatisticBuilder(leftColStats);
if (leftColStats.minValue <= rightColStats.minValue) {
estimatedColStatsBuilder.setMinValue(leftColStats.minValue);
@ -113,244 +107,117 @@ public class FilterEstimation extends ExpressionVisitor<StatsDeriveResult, Estim
}
return orStats;
}
throw new RuntimeException(String.format("Unexpected predicate type: %s", predicate.toSql()));
return context.statistics;
}
@Override
public StatsDeriveResult visitComparisonPredicate(ComparisonPredicate cp, EstimationContext context) {
boolean isNot = (context != null) && context.isNot;
public Statistics visitComparisonPredicate(ComparisonPredicate cp, EstimationContext context) {
Expression left = cp.left();
Expression right = cp.right();
ColumnStatistic statsForLeft = ExpressionEstimation.estimate(left, inputStats);
ColumnStatistic statsForRight = ExpressionEstimation.estimate(right, inputStats);
ColumnStatisticBuilder leftBuilder = new ColumnStatisticBuilder(statsForLeft);
double selectivity;
ColumnStatistic statsForLeft = ExpressionEstimation.estimate(left, context.statistics);
ColumnStatistic statsForRight = ExpressionEstimation.estimate(right, context.statistics);
if (!(left instanceof Literal) && !(right instanceof Literal)) {
selectivity = calculateWhenBothChildIsColumn(cp, statsForLeft, statsForRight);
return calculateWhenBothColumn(cp, context, statsForLeft, statsForRight);
} else {
// For literal, it's max min is same value.
selectivity = updateLeftStatsWhenRightChildIsLiteral(cp,
leftBuilder,
statsForRight.maxValue,
isNot);
return calculateWhenLiteralRight(cp,
statsForLeft,
statsForRight,
context);
}
StatsDeriveResult outputStats = inputStats.withSelectivity(selectivity);
//assumptions
// 1. func(A) and A have the same stats.
// 2. multiple input slots, like A + B > 10, (A+B)
if (left.getInputSlots().size() == 1) {
Slot leftSlot = left.getInputSlots().iterator().next();
outputStats.addColumnStats(leftSlot.getExprId(), leftBuilder.build());
}
return outputStats;
}
private double updateLessThan(ColumnStatisticBuilder statsForLeft, double val,
double min, double max, double ndv) {
double selectivity = 1.0;
if (val <= min) {
statsForLeft.setMaxValue(val);
statsForLeft.setMinValue(0);
statsForLeft.setNdv(0);
selectivity = 0.0;
} else if (val > max) {
selectivity = 1.0;
} else if (val == max) {
selectivity = 1.0 - 1.0 / ndv;
} else {
statsForLeft.setMaxValue(val);
selectivity = (val - min) / (max - min);
statsForLeft.setNdv(selectivity * statsForLeft.getNdv());
private Statistics updateLessThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft,
double val, EstimationContext context) {
if (statsForLeft.histogram != null) {
return estimateLessThanLiteralWithHistogram(leftExpr, statsForLeft, val, context);
}
return selectivity;
return estimateBinaryComparisonFilter(leftExpr,
statsForLeft,
new StatisticRange(Double.NEGATIVE_INFINITY, val, statsForLeft.ndv), context);
}
private double updateLessThanEqual(ColumnStatisticBuilder statsForLeft, double val,
double min, double max, double ndv) {
double selectivity = 1.0;
if (val < min) {
statsForLeft.setMaxValue(val);
statsForLeft.setMinValue(val);
selectivity = 0.0;
} else if (val == min) {
statsForLeft.setMaxValue(val);
selectivity = 1.0 / ndv;
} else if (val >= max) {
selectivity = 1.0;
} else {
statsForLeft.setMaxValue(val);
selectivity = (val - min) / (max - min);
statsForLeft.setNdv(selectivity * statsForLeft.getNdv());
private Statistics updateGreaterThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft,
double val, EstimationContext context) {
if (statsForLeft.histogram != null) {
return estimateGreaterThanLiteralWithHistogram(leftExpr, statsForLeft, val, context);
}
return selectivity;
StatisticRange rightRange = new StatisticRange(val, Double.POSITIVE_INFINITY,
statsForLeft.ndv);
return estimateBinaryComparisonFilter(leftExpr, statsForLeft, rightRange, context);
}
private double updateGreaterThan(ColumnStatisticBuilder statsForLeft, double val,
double min, double max, double ndv) {
double selectivity = 1.0;
if (val >= max) {
statsForLeft.setMaxValue(val);
statsForLeft.setMinValue(val);
statsForLeft.setNdv(0);
selectivity = 0.0;
} else if (val == min) {
selectivity = 1.0 - 1.0 / ndv;
} else if (val < min) {
selectivity = 1.0;
} else {
statsForLeft.setMinValue(val);
selectivity = (max - val) / (max - min);
statsForLeft.setNdv(selectivity * statsForLeft.getNdv());
private Statistics calculateWhenLiteralRight(ComparisonPredicate cp,
ColumnStatistic statsForLeft, ColumnStatistic statsForRight, EstimationContext context) {
if (statsForLeft == ColumnStatistic.UNKNOWN) {
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
}
return selectivity;
}
private double updateGreaterThanEqual(ColumnStatisticBuilder statsForLeft, double val,
double min, double max, double ndv) {
double selectivity = 1.0;
if (val > max) {
statsForLeft.setMinValue(val);
statsForLeft.setMaxValue(val);
selectivity = 0.0;
} else if (val == max) {
statsForLeft.setMinValue(val);
statsForLeft.setMaxValue(val);
selectivity = 1.0 / ndv;
} else if (val <= min) {
selectivity = 1.0;
} else {
statsForLeft.setMinValue(val);
selectivity = (max - val) / (max - min);
statsForLeft.setNdv(selectivity * statsForLeft.getNdv());
Expression rightExpr = cp.child(1);
if (!(rightExpr.getDataType() instanceof NumericType)) {
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
}
return selectivity;
}
private double updateLeftStatsWhenRightChildIsLiteral(ComparisonPredicate cp,
ColumnStatisticBuilder statsForLeft, double val, boolean isNot) {
double selectivity = 1.0;
double ndv = statsForLeft.getNdv();
double max = statsForLeft.getMaxValue();
double min = statsForLeft.getMinValue();
double selectivity;
double ndv = statsForLeft.ndv;
double val = statsForRight.maxValue;
if (cp instanceof EqualTo || cp instanceof NullSafeEqual) {
if (!isNot) {
if (statsForLeft.isUnknown()) {
selectivity = DEFAULT_EQUALITY_COMPARISON_SELECTIVITY;
} else {
statsForLeft.setMaxValue(val);
statsForLeft.setMinValue(val);
if (val > max || val < min) {
statsForLeft.setNdv(0);
statsForLeft.setSelectivity(0);
selectivity = 0.0;
} else {
statsForLeft.setNdv(1);
selectivity = 1.0 / ndv;
}
}
if (statsForLeft == ColumnStatistic.UNKNOWN) {
selectivity = DEFAULT_EQUALITY_COMPARISON_SELECTIVITY;
} else {
if (statsForLeft.isUnknown()) {
selectivity = 1 - DEFAULT_EQUALITY_COMPARISON_SELECTIVITY;
if (val > statsForLeft.maxValue || val < statsForLeft.minValue) {
selectivity = 0.0;
} else {
if (val <= max && val >= min) {
selectivity = 1 - DEFAULT_EQUALITY_COMPARISON_SELECTIVITY;
}
selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv);
}
}
} else if (statsForLeft.isUnknown()) {
selectivity = DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
if (context.isNot) {
selectivity = 1 - selectivity;
}
if (statsForLeft.histogram != null) {
return estimateEqualToWithHistogram(cp.left(), statsForLeft, val, context);
}
return context.statistics.withSel(selectivity);
} else {
if (cp instanceof LessThan) {
if (isNot) {
selectivity = updateGreaterThanEqual(statsForLeft, val, min, max, ndv);
if (cp instanceof LessThan || cp instanceof LessThanEqual) {
if (context.isNot) {
return updateGreaterThanLiteral(cp.left(), statsForLeft, val, context);
} else {
selectivity = updateLessThan(statsForLeft, val, min, max, ndv);
return updateLessThanLiteral(cp.left(), statsForLeft, val, context);
}
} else if (cp instanceof LessThanEqual) {
if (isNot) {
selectivity = updateGreaterThan(statsForLeft, val, min, max, ndv);
} else if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
if (context.isNot) {
return updateLessThanLiteral(cp.left(), statsForLeft, val, context);
} else {
selectivity = updateLessThanEqual(statsForLeft, val, min, max, ndv);
}
} else if (cp instanceof GreaterThan) {
if (isNot) {
selectivity = updateLessThanEqual(statsForLeft, val, min, max, ndv);
} else {
selectivity = updateGreaterThan(statsForLeft, val, min, max, ndv);
}
} else if (cp instanceof GreaterThanEqual) {
if (isNot) {
selectivity = updateLessThan(statsForLeft, val, min, max, ndv);
} else {
selectivity = updateGreaterThanEqual(statsForLeft, val, min, max, ndv);
return updateGreaterThanLiteral(cp.left(), statsForLeft, val, context);
}
} else {
throw new RuntimeException(String.format("Unexpected expression : %s", cp.toSql()));
}
}
return selectivity;
}
private double calculateWhenBothChildIsColumn(ComparisonPredicate cp,
private Statistics calculateWhenBothColumn(ComparisonPredicate cp, EstimationContext context,
ColumnStatistic statsForLeft, ColumnStatistic statsForRight) {
double leftMin = statsForLeft.minValue;
double rightMin = statsForRight.minValue;
double leftMax = statsForLeft.maxValue;
double rightMax = statsForRight.maxValue;
Expression left = cp.left();
Expression right = cp.right();
if (cp instanceof EqualTo || cp instanceof NullSafeEqual) {
if (!statsForLeft.hasIntersect(statsForRight)) {
return 0.0;
}
return DEFAULT_EQUALITY_COMPARISON_SELECTIVITY;
return estimateColumnEqualToColumn(left, statsForLeft, right, statsForRight, context);
}
if (cp instanceof GreaterThan) {
if (leftMax <= rightMin) {
return 0.0;
} else if (leftMin >= rightMax) {
return 1.0;
} else {
return DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
}
if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
return estimateColumnLessThanColumn(right, statsForRight, left, statsForLeft, context);
}
if (cp instanceof GreaterThanEqual) {
if (leftMax < rightMin) {
return 0.0;
} else if (leftMin > rightMax) {
return 1.0;
} else {
return DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
}
if (cp instanceof LessThan || cp instanceof LessThanEqual) {
return estimateColumnLessThanColumn(left, statsForLeft, right, statsForRight, context);
}
if (cp instanceof LessThan) {
if (leftMin >= rightMax) {
return 0.0;
} else if (leftMax <= rightMin) {
return 1.0;
} else {
return DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
}
}
if (cp instanceof LessThanEqual) {
if (leftMin > rightMax) {
return 0.0;
} else if (leftMax < rightMin) {
return 1.0;
} else {
return DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
}
}
throw new RuntimeException(String.format("Unexpected expression : %s", cp.toSql()));
return context.statistics;
}
@Override
public StatsDeriveResult visitInPredicate(InPredicate inPredicate, EstimationContext context) {
public Statistics visitInPredicate(InPredicate inPredicate, EstimationContext context) {
boolean isNotIn = context != null && context.isNot;
Expression compareExpr = inPredicate.getCompareExpr();
ColumnStatistic compareExprStats = ExpressionEstimation.estimate(compareExpr, inputStats);
ColumnStatistic compareExprStats = ExpressionEstimation.estimate(compareExpr, context.statistics);
if (compareExprStats.isUnKnown) {
return inputStats.withSelectivity(DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY);
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
}
List<Expression> options = inPredicate.getOptions();
double maxOption = 0;
@ -376,7 +243,7 @@ public class FilterEstimation extends ExpressionVisitor<StatsDeriveResult, Estim
ColumnStatisticBuilder compareExprStatsBuilder = new ColumnStatisticBuilder(compareExprStats);
if (isNotIn) {
for (Expression option : options) {
ColumnStatistic optionStats = ExpressionEstimation.estimate(option, inputStats);
ColumnStatistic optionStats = ExpressionEstimation.estimate(option, context.statistics);
double validOptionNdv = compareExprStats.ndvIntersection(optionStats);
if (validOptionNdv > 0.0) {
validInOptCount += validOptionNdv;
@ -386,7 +253,7 @@ public class FilterEstimation extends ExpressionVisitor<StatsDeriveResult, Estim
columnSelectivity = compareExprStats.ndv == 0 ? 0 : Math.max(1, validInOptCount) / compareExprStats.ndv;
} else {
for (Expression option : options) {
ColumnStatistic optionStats = ExpressionEstimation.estimate(option, inputStats);
ColumnStatistic optionStats = ExpressionEstimation.estimate(option, context.statistics);
double validOptionNdv = compareExprStats.ndvIntersection(optionStats);
if (validOptionNdv > 0.0) {
validInOptCount += validOptionNdv;
@ -401,40 +268,204 @@ public class FilterEstimation extends ExpressionVisitor<StatsDeriveResult, Estim
} else {
double outputRange = maxOption - minOption;
double originRange = Math.max(1, compareExprStats.maxValue - compareExprStats.minValue);
double orginDensity = compareExprStats.ndv / originRange;
double outputDensity = validInOptCount / outputRange;
columnSelectivity = Math.min(1, outputDensity / orginDensity);
double orginDensity = StatsMathUtil.minNonNaN(1,
compareExprStats.ndv / StatsMathUtil.nonZeroDivisor(originRange));
double outputDensity = StatsMathUtil.minNonNaN(1,
validInOptCount / StatsMathUtil.nonZeroDivisor(outputRange));
columnSelectivity = StatsMathUtil.minNonNaN(1, outputDensity
/ StatsMathUtil.nonZeroDivisor(orginDensity));
}
compareExprStatsBuilder.setMaxValue(maxOption);
compareExprStatsBuilder.setMinValue(minOption);
}
selectivity = Math.min(1.0, validInOptCount / compareExprStats.ndv);
selectivity = StatsMathUtil.minNonNaN(1.0, validInOptCount / compareExprStats.ndv);
compareExprStatsBuilder.setSelectivity(compareExprStats.selectivity * columnSelectivity);
compareExprStatsBuilder.setNdv(validInOptCount);
StatsDeriveResult estimated = new StatsDeriveResult(inputStats);
Statistics estimated = new Statistics(context.statistics);
estimated = estimated.withSelectivity(selectivity);
estimated = estimated.withSel(selectivity);
if (compareExpr instanceof SlotReference) {
estimated.addColumnStats(((SlotReference) compareExpr).getExprId(),
estimated.addColumnStats(compareExpr,
compareExprStatsBuilder.build());
}
return estimated;
}
@Override
public StatsDeriveResult visitNot(Not not, EstimationContext none) {
public Statistics visitNot(Not not, EstimationContext none) {
Preconditions.checkState(!(not.child() instanceof Not),
"Consecutive Not statement should be merged previously");
EstimationContext context = new EstimationContext();
context.isNot = true;
EstimationContext context = new EstimationContext(true, none.statistics);
return not.child().accept(this, context);
}
static class EstimationContext {
private boolean isNot;
private Statistics statistics;
public EstimationContext() {
}
public EstimationContext(boolean isNot, Statistics statistics) {
this.isNot = isNot;
this.statistics = statistics;
}
}
private Statistics estimateBinaryComparisonFilter(Expression leftExpr, ColumnStatistic leftStats,
StatisticRange rightRange, EstimationContext context) {
StatisticRange leftRange =
new StatisticRange(leftStats.minValue, leftStats.maxValue, leftStats.ndv);
StatisticRange intersectRange = leftRange.intersect(rightRange);
ColumnStatisticBuilder leftColumnStatisticBuilder = new ColumnStatisticBuilder(leftStats)
.setMinValue(intersectRange.getLow())
.setMaxValue(intersectRange.getHigh())
.setNdv(intersectRange.getDistinctValues());
double sel = leftRange.overlapPercentWith(rightRange);
Statistics updatedStatistics = context.statistics.withSel(sel);
updatedStatistics.addColumnStats(leftExpr, leftColumnStatisticBuilder.build());
return updatedStatistics;
}
private Statistics estimateColumnEqualToColumn(Expression leftExpr, ColumnStatistic leftStats,
Expression rightExpr, ColumnStatistic rightStats, EstimationContext context) {
StatisticRange leftRange = StatisticRange.from(leftStats);
StatisticRange rightRange = StatisticRange.from(rightStats);
StatisticRange leftIntersectRight = leftRange.intersect(rightRange);
StatisticRange rightIntersectLeft = rightRange.intersect(leftIntersectRight);
ColumnStatisticBuilder leftBuilder = new ColumnStatisticBuilder(leftStats);
leftBuilder.setNdv(leftIntersectRight.getDistinctValues());
leftBuilder.setMinValue(leftIntersectRight.getLow());
leftBuilder.setMaxValue(leftIntersectRight.getHigh());
ColumnStatisticBuilder rightBuilder = new ColumnStatisticBuilder(rightStats);
rightBuilder.setNdv(rightIntersectLeft.getDistinctValues());
rightBuilder.setMinValue(rightIntersectLeft.getLow());
rightBuilder.setMaxValue(rightIntersectLeft.getDistinctValues());
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
Statistics updatedStatistics = context.statistics.withSel(sel);
updatedStatistics.addColumnStats(leftExpr, leftBuilder.build());
updatedStatistics.addColumnStats(rightExpr, rightBuilder.build());
return updatedStatistics;
}
private Statistics estimateColumnLessThanColumn(Expression leftExpr, ColumnStatistic leftStats,
Expression rightExpr, ColumnStatistic rightStats, EstimationContext context) {
StatisticRange leftRange = StatisticRange.from(leftStats);
StatisticRange rightRange = StatisticRange.from(rightStats);
Statistics statistics = null;
// Left always less than Right
if (leftRange.getHigh() < rightRange.getLow()) {
statistics =
context.statistics.withRowCount(Math.min(context.statistics.getRowCount() - leftStats.numNulls,
context.statistics.getRowCount() - rightStats.numNulls));
statistics.addColumnStats(leftExpr, new ColumnStatisticBuilder(leftStats).setNumNulls(0.0).build());
statistics.addColumnStats(rightExpr, new ColumnStatisticBuilder(rightStats).setNumNulls(0.0).build());
return statistics;
}
double leftOverlapPercent = leftRange.overlapPercentWith(rightRange);
// Left always greater than right
if (leftOverlapPercent == 0) {
return context.statistics.withRowCount(0.0);
}
StatisticRange leftAlwaysLessThanRightRange = new StatisticRange(leftStats.minValue,
rightStats.minValue, Double.NaN);
double leftAlwaysLessThanRightPercent = 0;
if (leftRange.getLow() < rightRange.getLow()) {
leftAlwaysLessThanRightPercent = leftRange.overlapPercentWith(leftAlwaysLessThanRightRange);
}
ColumnStatistic leftColumnStatistic = new ColumnStatisticBuilder(leftStats)
.setMaxValue(Math.min(leftRange.getHigh(), rightRange.getHigh()))
.setMinValue(leftRange.getLow())
.setNdv(leftStats.ndv * (leftAlwaysLessThanRightPercent + leftOverlapPercent))
.setNumNulls(0)
.build();
double rightOverlappingRangeFraction = rightRange.overlapPercentWith(leftRange);
double rightAlwaysGreaterRangeFraction = 0;
if (leftRange.getHigh() < rightRange.getHigh()) {
rightAlwaysGreaterRangeFraction = rightRange.overlapPercentWith(new StatisticRange(leftRange.getHigh(),
rightRange.getHigh(), Double.NaN));
}
ColumnStatistic rightColumnStatistic = new ColumnStatisticBuilder(rightStats)
.setMinValue(Math.max(leftRange.getLow(), rightRange.getLow()))
.setMaxValue(rightRange.getHigh())
.setAvgSizeByte(rightStats.ndv * (rightAlwaysGreaterRangeFraction + rightOverlappingRangeFraction))
.setNumNulls(0)
.build();
double sel = leftAlwaysLessThanRightPercent
+ leftOverlapPercent * rightOverlappingRangeFraction * DEFAULT_INEQUALITY_COEFFICIENT
+ leftOverlapPercent * rightAlwaysGreaterRangeFraction;
return context.statistics.withSel(sel)
.addColumnStats(leftExpr, leftColumnStatistic)
.addColumnStats(rightExpr, rightColumnStatistic);
}
private Statistics estimateLessThanLiteralWithHistogram(Expression leftExpr, ColumnStatistic leftStats,
double numVal, EstimationContext context) {
Histogram leftHist = leftStats.histogram;
for (int i = 0; i < leftHist.buckets.size(); i++) {
Bucket bucket = leftHist.buckets.get(i);
if (bucket.upper >= numVal && bucket.lower <= numVal) {
double overlapPercentInBucket = StatsMathUtil.minNonNaN(1, (numVal - bucket.lower)
/ (bucket.upper - bucket.lower));
double overlapCountInBucket = overlapPercentInBucket * bucket.count;
double sel = StatsMathUtil.minNonNaN(1, (bucket.preSum + overlapCountInBucket)
/ StatsMathUtil.nonZeroDivisor(context.statistics.getRowCount()));
List<Bucket> updatedBucketList = leftHist.buckets.subList(0, i + 1);
updatedBucketList.add(new Bucket(bucket.lower, numVal, overlapCountInBucket,
bucket.preSum, overlapPercentInBucket * bucket.ndv));
ColumnStatistic columnStatistic = new ColumnStatisticBuilder(leftStats)
.setMaxValue(numVal)
.setHistogram(new HistogramBuilder(leftHist).setBuckets(updatedBucketList).build())
.build();
return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic);
}
}
return context.statistics.withSel(0);
}
private Statistics estimateGreaterThanLiteralWithHistogram(Expression leftExpr, ColumnStatistic leftStats,
double numVal, EstimationContext context) {
Histogram leftHist = leftStats.histogram;
for (int i = 0; i < leftHist.buckets.size(); i++) {
Bucket bucket = leftHist.buckets.get(i);
if (bucket.upper >= numVal && bucket.lower <= numVal) {
double overlapPercentInBucket = StatsMathUtil.minNonNaN(1, ((bucket.upper - numVal)
/ (bucket.upper - bucket.lower)));
double overlapCountInBucket = (1 - overlapPercentInBucket) * bucket.count;
double sel = StatsMathUtil.minNonNaN(1, (leftHist.size() - bucket.preSum - overlapCountInBucket)
/ context.statistics.getRowCount());
List<Bucket> updatedBucketList = new ArrayList<>();
updatedBucketList.add(new Bucket(numVal, bucket.upper, overlapPercentInBucket * bucket.count,
0, overlapPercentInBucket * bucket.ndv));
updatedBucketList.addAll(leftHist.buckets.subList(i, leftHist.buckets.size()));
ColumnStatistic columnStatistic = new ColumnStatisticBuilder(leftStats)
.setMaxValue(numVal)
.setHistogram(new HistogramBuilder(leftHist).setBuckets(updatedBucketList).build())
.build();
return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic);
}
}
return context.statistics.withSel(0);
}
private Statistics estimateEqualToWithHistogram(Expression leftExpr, ColumnStatistic leftStats,
double numVal, EstimationContext context) {
Histogram histogram = leftStats.histogram;
ColumnStatistic columnStatistic = new ColumnStatisticBuilder(leftStats)
.setHistogram(null)
.build();
double sel = 0;
for (int i = 0; i < histogram.buckets.size(); i++) {
Bucket bucket = histogram.buckets.get(i);
if (bucket.upper >= numVal && bucket.lower <= numVal) {
sel = (bucket.count / bucket.ndv) / histogram.size();
}
}
return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic);
}
}

View File

@ -17,131 +17,86 @@
package org.apache.doris.nereids.stats;
import org.apache.doris.common.CheckedMath;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.algebra.Join;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.List;
import java.util.stream.Collectors;
/**
* Estimate hash join stats.
* TODO: Update other props in the ColumnStats properly.
*/
public class JoinEstimation {
private static final Logger LOG = LogManager.getLogger(JoinEstimation.class);
private static Statistics estimateInnerJoin(Statistics crossJoinStats, List<Expression> joinConditions) {
List<Pair<Expression, Double>> sortedJoinConditions = joinConditions.stream()
.map(expression -> Pair.of(expression, 0.0)).sorted((a, b) -> {
double selA = estimateJoinConditionSel(crossJoinStats, a.first);
a.second = selA;
double selB = estimateJoinConditionSel(crossJoinStats, b.first);
b.second = selB;
double sub = selA - selB;
if (sub > 0) {
return -1;
} else if (sub < 0) {
return 1;
} else {
return 0;
}
}).collect(Collectors.toList());
private static double estimateInnerJoin(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) {
Preconditions.checkArgument(join.getJoinType() == JoinType.INNER_JOIN);
double rowCount = Double.MAX_VALUE;
if (join.getHashJoinConjuncts().isEmpty()) {
rowCount = leftStats.getRowCount() * rightStats.getRowCount();
} else {
for (Expression equalTo : join.getHashJoinConjuncts()) {
double tmpRowCount = estimateEqualJoinCondition((EqualTo) equalTo, leftStats, rightStats);
rowCount = Math.min(rowCount, tmpRowCount);
}
double sel = 1.0;
for (int i = 0; i < sortedJoinConditions.size(); i++) {
sel *= Math.pow(sortedJoinConditions.get(i).second, 1 / Math.pow(2, i));
}
return rowCount;
return crossJoinStats.withSel(sel);
}
private static double estimateEqualJoinCondition(EqualTo equalto,
StatsDeriveResult leftStats, StatsDeriveResult rightStats) {
SlotReference eqRight = (SlotReference) equalto.child(1).getInputSlots().iterator().next();
ColumnStatistic rColumnStats = rightStats.getSlotIdToColumnStats().get(eqRight.getExprId());
SlotReference eqLeft = (SlotReference) equalto.child(0).getInputSlots().iterator().next();
if (rColumnStats == null) {
rColumnStats = rightStats.getSlotIdToColumnStats().get(eqLeft.getExprId());
}
if (rColumnStats == null) {
LOG.info("estimate inner join failed, column stats not found: %s", eqRight);
throw new RuntimeException("estimateInnerJoin cannot find columnStats: " + eqRight);
}
double rowCount = 0;
if (rColumnStats.isUnKnown || rColumnStats.ndv == 0) {
rowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount());
} else {
//TODO range is not considered
rowCount = (leftStats.getRowCount()
* rightStats.getRowCount()
* rColumnStats.selectivity
/ rColumnStats.ndv);
}
rowCount = Math.max(1, Math.ceil(rowCount));
return rowCount;
}
private static double estimateLeftSemiJoin(double leftCount, double rightCount) {
//TODO the estimation of semi and anti join is not proper, just for tpch q21
return leftCount - leftCount / Math.max(2, rightCount);
}
private static double estimateFullOuterJoin(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) {
//TODO: after we have histogram, re-design this logical
return leftStats.getRowCount() + rightStats.getRowCount();
private static double estimateJoinConditionSel(Statistics crossJoinStats, Expression joinCond) {
Statistics statistics = new FilterEstimation().estimate(joinCond, crossJoinStats);
return statistics.getRowCount() / crossJoinStats.getRowCount();
}
/**
* estimate join
*/
public static StatsDeriveResult estimate(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) {
public static Statistics estimate(Statistics leftStats, Statistics rightStats, Join join) {
JoinType joinType = join.getJoinType();
double rowCount = Double.MAX_VALUE;
if (joinType == JoinType.LEFT_SEMI_JOIN
|| joinType == JoinType.LEFT_ANTI_JOIN
|| joinType == JoinType.NULL_AWARE_LEFT_ANTI_JOIN) {
double rightCount = rightStats.getRowCount();
double leftCount = leftStats.getRowCount();
if (join.getHashJoinConjuncts().isEmpty()) {
rowCount = joinType == JoinType.LEFT_SEMI_JOIN ? leftCount : 0;
} else {
rowCount = estimateLeftSemiJoin(leftCount, rightCount);
}
} else if (joinType == JoinType.RIGHT_SEMI_JOIN || joinType == JoinType.RIGHT_ANTI_JOIN) {
double rightCount = rightStats.getRowCount();
double leftCount = leftStats.getRowCount();
if (join.getHashJoinConjuncts().isEmpty()) {
rowCount = joinType == JoinType.RIGHT_SEMI_JOIN ? rightCount : 0;
} else {
rowCount = estimateLeftSemiJoin(rightCount, leftCount);
}
Statistics crossJoinStats = new StatisticsBuilder()
.setRowCount(leftStats.getRowCount() * rightStats.getRowCount())
.putColumnStatistics(leftStats.columnStatistics())
.putColumnStatistics(rightStats.columnStatistics())
.build();
List<Expression> joinConditions = join.getHashJoinConjuncts();
Statistics innerJoinStats = estimateInnerJoin(crossJoinStats, joinConditions);
innerJoinStats.setWidth(leftStats.getWidth() + rightStats.getWidth());
innerJoinStats.setPenalty(0);
double rowCount;
if (joinType.isLeftSemiOrAntiJoin()) {
rowCount = Math.min(innerJoinStats.getRowCount(), leftStats.getRowCount());
return innerJoinStats.withRowCount(rowCount);
} else if (joinType.isRightSemiOrAntiJoin()) {
rowCount = Math.min(innerJoinStats.getRowCount(), rightStats.getRowCount());
return innerJoinStats.withRowCount(rowCount);
} else if (joinType == JoinType.INNER_JOIN) {
rowCount = estimateInnerJoin(leftStats, rightStats, join);
return innerJoinStats;
} else if (joinType == JoinType.LEFT_OUTER_JOIN) {
rowCount = leftStats.getRowCount();
rowCount = Math.max(leftStats.getRowCount(), innerJoinStats.getRowCount());
return innerJoinStats.withRowCount(rowCount);
} else if (joinType == JoinType.RIGHT_OUTER_JOIN) {
rowCount = rightStats.getRowCount();
rowCount = Math.max(rightStats.getRowCount(), innerJoinStats.getRowCount());
return innerJoinStats.withRowCount(rowCount);
} else if (joinType == JoinType.CROSS_JOIN) {
rowCount = CheckedMath.checkedMultiply(leftStats.getRowCount(),
rightStats.getRowCount());
return crossJoinStats;
} else if (joinType == JoinType.FULL_OUTER_JOIN) {
rowCount = estimateFullOuterJoin(leftStats, rightStats, join);
} else {
LOG.warn("join type is not supported: " + joinType);
throw new RuntimeException("joinType is not supported");
return innerJoinStats.withRowCount(leftStats.getRowCount()
+ rightStats.getRowCount() + innerJoinStats.getRowCount());
}
StatsDeriveResult statsDeriveResult = new StatsDeriveResult(rowCount,
rightStats.getWidth() + leftStats.getWidth(), 0, Maps.newHashMap());
if (joinType.isRemainLeftJoin()) {
statsDeriveResult.merge(leftStats);
}
if (joinType.isRemainRightJoin()) {
statsDeriveResult.merge(rightStats);
}
//TODO: consider other join conjuncts
return statsDeriveResult;
return crossJoinStats;
}
}

View File

@ -19,8 +19,8 @@ package org.apache.doris.nereids.stats;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.Id;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.memo.Group;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
@ -89,7 +89,9 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow;
import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.StatisticRange;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
import com.google.common.collect.Maps;
@ -103,7 +105,7 @@ import java.util.stream.Collectors;
/**
* Used to calculate the stats for each plan
*/
public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void> {
public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
private final GroupExpression groupExpression;
private StatsCalculator(GroupExpression groupExpression) {
@ -119,8 +121,8 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
}
private void estimate() {
StatsDeriveResult stats = groupExpression.getPlan().accept(this, null);
StatsDeriveResult originStats = groupExpression.getOwnerGroup().getStatistics();
Statistics stats = groupExpression.getPlan().accept(this, null);
Statistics originStats = groupExpression.getOwnerGroup().getStatistics();
/*
in an ideal cost model, every group expression in a group are equivalent, but in fact the cost are different.
we record the lowest expression cost as group cost to avoid missing this group.
@ -133,208 +135,208 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
}
@Override
public StatsDeriveResult visitLogicalEmptyRelation(LogicalEmptyRelation emptyRelation, Void context) {
public Statistics visitLogicalEmptyRelation(LogicalEmptyRelation emptyRelation, Void context) {
return computeEmptyRelation(emptyRelation);
}
@Override
public StatsDeriveResult visitLogicalLimit(LogicalLimit<? extends Plan> limit, Void context) {
public Statistics visitLogicalLimit(LogicalLimit<? extends Plan> limit, Void context) {
return computeLimit(limit);
}
@Override
public StatsDeriveResult visitPhysicalLimit(PhysicalLimit<? extends Plan> limit, Void context) {
public Statistics visitPhysicalLimit(PhysicalLimit<? extends Plan> limit, Void context) {
return computeLimit(limit);
}
@Override
public StatsDeriveResult visitLogicalOneRowRelation(LogicalOneRowRelation oneRowRelation, Void context) {
public Statistics visitLogicalOneRowRelation(LogicalOneRowRelation oneRowRelation, Void context) {
return computeOneRowRelation(oneRowRelation);
}
@Override
public StatsDeriveResult visitLogicalAggregate(LogicalAggregate<? extends Plan> aggregate, Void context) {
public Statistics visitLogicalAggregate(LogicalAggregate<? extends Plan> aggregate, Void context) {
return computeAggregate(aggregate);
}
@Override
public StatsDeriveResult visitLogicalRepeat(LogicalRepeat<? extends Plan> repeat, Void context) {
public Statistics visitLogicalRepeat(LogicalRepeat<? extends Plan> repeat, Void context) {
return computeRepeat(repeat);
}
@Override
public StatsDeriveResult visitLogicalFilter(LogicalFilter<? extends Plan> filter, Void context) {
public Statistics visitLogicalFilter(LogicalFilter<? extends Plan> filter, Void context) {
return computeFilter(filter);
}
@Override
public StatsDeriveResult visitLogicalOlapScan(LogicalOlapScan olapScan, Void context) {
public Statistics visitLogicalOlapScan(LogicalOlapScan olapScan, Void context) {
olapScan.getExpressions();
return computeScan(olapScan);
}
@Override
public StatsDeriveResult visitLogicalSchemaScan(LogicalSchemaScan schemaScan, Void context) {
public Statistics visitLogicalSchemaScan(LogicalSchemaScan schemaScan, Void context) {
return computeScan(schemaScan);
}
@Override
public StatsDeriveResult visitLogicalFileScan(LogicalFileScan fileScan, Void context) {
public Statistics visitLogicalFileScan(LogicalFileScan fileScan, Void context) {
fileScan.getExpressions();
return computeScan(fileScan);
}
@Override
public StatsDeriveResult visitLogicalTVFRelation(LogicalTVFRelation tvfRelation, Void context) {
public Statistics visitLogicalTVFRelation(LogicalTVFRelation tvfRelation, Void context) {
return tvfRelation.getFunction().computeStats(tvfRelation.getOutput());
}
@Override
public StatsDeriveResult visitLogicalJdbcScan(LogicalJdbcScan jdbcScan, Void context) {
public Statistics visitLogicalJdbcScan(LogicalJdbcScan jdbcScan, Void context) {
jdbcScan.getExpressions();
return computeScan(jdbcScan);
}
@Override
public StatsDeriveResult visitLogicalEsScan(LogicalEsScan esScan, Void context) {
public Statistics visitLogicalEsScan(LogicalEsScan esScan, Void context) {
esScan.getExpressions();
return computeScan(esScan);
}
@Override
public StatsDeriveResult visitLogicalProject(LogicalProject<? extends Plan> project, Void context) {
public Statistics visitLogicalProject(LogicalProject<? extends Plan> project, Void context) {
return computeProject(project);
}
@Override
public StatsDeriveResult visitLogicalSort(LogicalSort<? extends Plan> sort, Void context) {
public Statistics visitLogicalSort(LogicalSort<? extends Plan> sort, Void context) {
return groupExpression.childStatistics(0);
}
@Override
public StatsDeriveResult visitLogicalTopN(LogicalTopN<? extends Plan> topN, Void context) {
public Statistics visitLogicalTopN(LogicalTopN<? extends Plan> topN, Void context) {
return computeTopN(topN);
}
@Override
public StatsDeriveResult visitLogicalJoin(LogicalJoin<? extends Plan, ? extends Plan> join, Void context) {
public Statistics visitLogicalJoin(LogicalJoin<? extends Plan, ? extends Plan> join, Void context) {
return JoinEstimation.estimate(groupExpression.childStatistics(0),
groupExpression.childStatistics(1), join);
}
@Override
public StatsDeriveResult visitLogicalAssertNumRows(
public Statistics visitLogicalAssertNumRows(
LogicalAssertNumRows<? extends Plan> assertNumRows, Void context) {
return computeAssertNumRows(assertNumRows.getAssertNumRowsElement().getDesiredNumOfRows());
}
@Override
public StatsDeriveResult visitLogicalUnion(
public Statistics visitLogicalUnion(
LogicalUnion union, Void context) {
return computeUnion(union);
}
@Override
public StatsDeriveResult visitLogicalExcept(
public Statistics visitLogicalExcept(
LogicalExcept except, Void context) {
return computeExcept(except);
}
@Override
public StatsDeriveResult visitLogicalIntersect(
public Statistics visitLogicalIntersect(
LogicalIntersect intersect, Void context) {
return computeIntersect(intersect);
}
@Override
public StatsDeriveResult visitLogicalGenerate(LogicalGenerate<? extends Plan> generate, Void context) {
public Statistics visitLogicalGenerate(LogicalGenerate<? extends Plan> generate, Void context) {
return computeGenerate(generate);
}
public StatsDeriveResult visitLogicalWindow(LogicalWindow<? extends Plan> window, Void context) {
public Statistics visitLogicalWindow(LogicalWindow<? extends Plan> window, Void context) {
return computeWindow(window);
}
@Override
public StatsDeriveResult visitPhysicalWindow(PhysicalWindow window, Void context) {
public Statistics visitPhysicalWindow(PhysicalWindow window, Void context) {
return computeWindow(window);
}
@Override
public StatsDeriveResult visitPhysicalEmptyRelation(PhysicalEmptyRelation emptyRelation, Void context) {
public Statistics visitPhysicalEmptyRelation(PhysicalEmptyRelation emptyRelation, Void context) {
return computeEmptyRelation(emptyRelation);
}
@Override
public StatsDeriveResult visitPhysicalHashAggregate(PhysicalHashAggregate<? extends Plan> agg, Void context) {
public Statistics visitPhysicalHashAggregate(PhysicalHashAggregate<? extends Plan> agg, Void context) {
return computeAggregate(agg);
}
@Override
public StatsDeriveResult visitPhysicalRepeat(PhysicalRepeat<? extends Plan> repeat, Void context) {
public Statistics visitPhysicalRepeat(PhysicalRepeat<? extends Plan> repeat, Void context) {
return computeRepeat(repeat);
}
@Override
public StatsDeriveResult visitPhysicalOneRowRelation(PhysicalOneRowRelation oneRowRelation, Void context) {
public Statistics visitPhysicalOneRowRelation(PhysicalOneRowRelation oneRowRelation, Void context) {
return computeOneRowRelation(oneRowRelation);
}
@Override
public StatsDeriveResult visitPhysicalOlapScan(PhysicalOlapScan olapScan, Void context) {
public Statistics visitPhysicalOlapScan(PhysicalOlapScan olapScan, Void context) {
return computeScan(olapScan);
}
@Override
public StatsDeriveResult visitPhysicalSchemaScan(PhysicalSchemaScan schemaScan, Void context) {
public Statistics visitPhysicalSchemaScan(PhysicalSchemaScan schemaScan, Void context) {
return computeScan(schemaScan);
}
@Override
public StatsDeriveResult visitPhysicalFileScan(PhysicalFileScan fileScan, Void context) {
public Statistics visitPhysicalFileScan(PhysicalFileScan fileScan, Void context) {
return computeScan(fileScan);
}
@Override
public StatsDeriveResult visitPhysicalStorageLayerAggregate(
public Statistics visitPhysicalStorageLayerAggregate(
PhysicalStorageLayerAggregate storageLayerAggregate, Void context) {
return storageLayerAggregate.getRelation().accept(this, context);
}
@Override
public StatsDeriveResult visitPhysicalTVFRelation(PhysicalTVFRelation tvfRelation, Void context) {
public Statistics visitPhysicalTVFRelation(PhysicalTVFRelation tvfRelation, Void context) {
return tvfRelation.getFunction().computeStats(tvfRelation.getOutput());
}
@Override
public StatsDeriveResult visitPhysicalJdbcScan(PhysicalJdbcScan jdbcScan, Void context) {
public Statistics visitPhysicalJdbcScan(PhysicalJdbcScan jdbcScan, Void context) {
return computeScan(jdbcScan);
}
@Override
public StatsDeriveResult visitPhysicalEsScan(PhysicalEsScan esScan, Void context) {
public Statistics visitPhysicalEsScan(PhysicalEsScan esScan, Void context) {
return computeScan(esScan);
}
@Override
public StatsDeriveResult visitPhysicalQuickSort(PhysicalQuickSort<? extends Plan> sort, Void context) {
public Statistics visitPhysicalQuickSort(PhysicalQuickSort<? extends Plan> sort, Void context) {
return groupExpression.childStatistics(0);
}
@Override
public StatsDeriveResult visitPhysicalTopN(PhysicalTopN<? extends Plan> topN, Void context) {
public Statistics visitPhysicalTopN(PhysicalTopN<? extends Plan> topN, Void context) {
return computeTopN(topN);
}
@Override
public StatsDeriveResult visitPhysicalHashJoin(
public Statistics visitPhysicalHashJoin(
PhysicalHashJoin<? extends Plan, ? extends Plan> hashJoin, Void context) {
return JoinEstimation.estimate(groupExpression.childStatistics(0),
groupExpression.childStatistics(1), hashJoin);
}
@Override
public StatsDeriveResult visitPhysicalNestedLoopJoin(
public Statistics visitPhysicalNestedLoopJoin(
PhysicalNestedLoopJoin<? extends Plan, ? extends Plan> nestedLoopJoin,
Void context) {
return JoinEstimation.estimate(groupExpression.childStatistics(0),
@ -343,67 +345,66 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
// TODO: We should subtract those pruned column, and consider the expression transformations in the node.
@Override
public StatsDeriveResult visitPhysicalProject(PhysicalProject<? extends Plan> project, Void context) {
public Statistics visitPhysicalProject(PhysicalProject<? extends Plan> project, Void context) {
return computeProject(project);
}
@Override
public StatsDeriveResult visitPhysicalFilter(PhysicalFilter<? extends Plan> filter, Void context) {
public Statistics visitPhysicalFilter(PhysicalFilter<? extends Plan> filter, Void context) {
return computeFilter(filter);
}
@Override
public StatsDeriveResult visitPhysicalDistribute(PhysicalDistribute<? extends Plan> distribute,
public Statistics visitPhysicalDistribute(PhysicalDistribute<? extends Plan> distribute,
Void context) {
return groupExpression.childStatistics(0);
}
@Override
public StatsDeriveResult visitPhysicalAssertNumRows(PhysicalAssertNumRows<? extends Plan> assertNumRows,
public Statistics visitPhysicalAssertNumRows(PhysicalAssertNumRows<? extends Plan> assertNumRows,
Void context) {
return computeAssertNumRows(assertNumRows.getAssertNumRowsElement().getDesiredNumOfRows());
}
@Override
public StatsDeriveResult visitPhysicalUnion(PhysicalUnion union, Void context) {
public Statistics visitPhysicalUnion(PhysicalUnion union, Void context) {
return computeUnion(union);
}
@Override
public StatsDeriveResult visitPhysicalExcept(PhysicalExcept except, Void context) {
public Statistics visitPhysicalExcept(PhysicalExcept except, Void context) {
return computeExcept(except);
}
@Override
public StatsDeriveResult visitPhysicalIntersect(PhysicalIntersect intersect, Void context) {
public Statistics visitPhysicalIntersect(PhysicalIntersect intersect, Void context) {
return computeIntersect(intersect);
}
@Override
public StatsDeriveResult visitPhysicalGenerate(PhysicalGenerate<? extends Plan> generate, Void context) {
public Statistics visitPhysicalGenerate(PhysicalGenerate<? extends Plan> generate, Void context) {
return computeGenerate(generate);
}
private StatsDeriveResult computeAssertNumRows(long desiredNumOfRows) {
StatsDeriveResult statsDeriveResult = groupExpression.childStatistics(0);
statsDeriveResult.updateByLimit(1);
return statsDeriveResult;
private Statistics computeAssertNumRows(long desiredNumOfRows) {
Statistics statistics = groupExpression.childStatistics(0);
statistics.withRowCount(Math.min(1, statistics.getRowCount()));
return statistics;
}
private StatsDeriveResult computeFilter(Filter filter) {
StatsDeriveResult stats = groupExpression.childStatistics(0);
FilterEstimation filterEstimation =
new FilterEstimation(stats);
return filterEstimation.estimate(filter.getPredicate());
private Statistics computeFilter(Filter filter) {
FilterEstimation filterEstimation = new FilterEstimation();
Statistics stats = groupExpression.childStatistics(0);
return filterEstimation.estimate(filter.getPredicate(), stats);
}
// TODO: 1. Subtract the pruned partition
// 2. Consider the influence of runtime filter
// 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now.
private StatsDeriveResult computeScan(Scan scan) {
private Statistics computeScan(Scan scan) {
Set<SlotReference> slotSet = scan.getOutput().stream().filter(SlotReference.class::isInstance)
.map(s -> (SlotReference) s).collect(Collectors.toSet());
Map<Id, ColumnStatistic> columnStatisticMap = new HashMap<>();
Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
TableIf table = scan.getTable();
double rowCount = scan.getTable().estimatedRowCount();
for (SlotReference slotReference : slotSet) {
@ -416,167 +417,132 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
if (!colStats.isUnKnown) {
rowCount = colStats.count;
}
columnStatisticMap.put(slotReference.getExprId(), colStats);
columnStatisticMap.put(slotReference, colStats);
}
StatsDeriveResult stats = new StatsDeriveResult(rowCount, columnStatisticMap);
return stats;
return new Statistics(rowCount, columnStatisticMap);
}
private StatsDeriveResult computeTopN(TopN topN) {
StatsDeriveResult stats = groupExpression.childStatistics(0);
return stats.updateByLimit(topN.getLimit());
private Statistics computeTopN(TopN topN) {
Statistics stats = groupExpression.childStatistics(0);
return stats.withRowCount(Math.min(stats.getRowCount(), topN.getLimit()));
}
private StatsDeriveResult computeLimit(Limit limit) {
StatsDeriveResult stats = groupExpression.childStatistics(0);
return stats.updateByLimit(limit.getLimit());
private Statistics computeLimit(Limit limit) {
Statistics stats = groupExpression.childStatistics(0);
return stats.withRowCount(Math.min(stats.getRowCount(), limit.getLimit()));
}
private StatsDeriveResult computeAggregate(Aggregate aggregate) {
private Statistics computeAggregate(Aggregate<? extends Plan> aggregate) {
// TODO: since we have no column stats here. just use a fix ratio to compute the row count.
List<Expression> groupByExpressions = aggregate.getGroupByExpressions();
StatsDeriveResult childStats = groupExpression.childStatistics(0);
Map<Id, ColumnStatistic> childSlotToColumnStats = childStats.getSlotIdToColumnStats();
Statistics childStats = groupExpression.childStatistics(0);
Map<Expression, ColumnStatistic> childSlotToColumnStats = childStats.columnStatistics();
double resultSetCount = groupByExpressions.stream().flatMap(expr -> expr.getInputSlots().stream())
.map(Slot::getExprId)
.filter(childSlotToColumnStats::containsKey).map(childSlotToColumnStats::get).map(s -> s.ndv)
.reduce(1d, (a, b) -> a * b);
if (resultSetCount <= 0) {
resultSetCount = 1L;
}
Map<Id, ColumnStatistic> slotToColumnStats = Maps.newHashMap();
resultSetCount = Math.min(resultSetCount, childStats.getRowCount());
Map<Expression, ColumnStatistic> slotToColumnStats = Maps.newHashMap();
List<NamedExpression> outputExpressions = aggregate.getOutputExpressions();
// TODO: 1. Estimate the output unit size by the type of corresponding AggregateFunction
// 2. Handle alias, literal in the output expression list
for (NamedExpression outputExpression : outputExpressions) {
ColumnStatistic columnStat = ExpressionEstimation.estimate(outputExpression, childStats);
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(columnStat);
builder.setNdv(Math.min(columnStat.ndv, resultSetCount));
slotToColumnStats.put(outputExpression.toSlot().getExprId(), columnStat);
builder.setNdv(resultSetCount);
slotToColumnStats.put(outputExpression.toSlot(), columnStat);
}
StatsDeriveResult statsDeriveResult = new StatsDeriveResult(resultSetCount, childStats.getWidth(),
childStats.getPenalty(), slotToColumnStats);
statsDeriveResult.setWidth(childStats.getWidth());
statsDeriveResult.setPenalty(childStats.getPenalty() + childStats.getRowCount());
return new Statistics(resultSetCount, slotToColumnStats, childStats.getWidth(),
childStats.getPenalty() + childStats.getRowCount());
// TODO: Update ColumnStats properly, add new mapping from output slot to ColumnStats
return statsDeriveResult;
}
private StatsDeriveResult computeRepeat(Repeat repeat) {
StatsDeriveResult childStats = groupExpression.childStatistics(0);
Map<Id, ColumnStatistic> slotIdToColumnStats = childStats.getSlotIdToColumnStats();
private Statistics computeRepeat(Repeat<? extends Plan> repeat) {
Statistics childStats = groupExpression.childStatistics(0);
Map<Expression, ColumnStatistic> slotIdToColumnStats = childStats.columnStatistics();
int groupingSetNum = repeat.getGroupingSets().size();
double rowCount = childStats.getRowCount();
Map<Id, ColumnStatistic> columnStatisticMap = slotIdToColumnStats.entrySet()
Map<Expression, ColumnStatistic> columnStatisticMap = slotIdToColumnStats.entrySet()
.stream().map(kv -> {
ColumnStatistic stats = kv.getValue();
return Pair.of(kv.getKey(), new ColumnStatistic(
stats.count < 0 ? stats.count : stats.count * groupingSetNum,
stats.ndv,
stats.avgSizeByte,
stats.numNulls < 0 ? stats.numNulls : stats.numNulls * groupingSetNum,
stats.dataSize < 0 ? stats.dataSize : stats.dataSize * groupingSetNum,
stats.minValue,
stats.maxValue,
stats.selectivity,
stats.minExpr,
stats.maxExpr,
stats.isUnKnown
));
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(stats);
columnStatisticBuilder
.setCount(stats.count < 0 ? stats.count : stats.count * groupingSetNum)
.setNumNulls(stats.numNulls < 0 ? stats.numNulls : stats.numNulls * groupingSetNum)
.setDataSize(stats.dataSize < 0 ? stats.dataSize : stats.dataSize * groupingSetNum);
return Pair.of(kv.getKey(), columnStatisticBuilder.build());
}).collect(Collectors.toMap(Pair::key, Pair::value));
return new StatsDeriveResult(rowCount < 0 ? rowCount : rowCount * groupingSetNum, columnStatisticMap);
return new Statistics(rowCount < 0 ? rowCount : rowCount * groupingSetNum, columnStatisticMap,
childStats.getWidth(), childStats.getPenalty());
}
// TODO: do real project on column stats
private StatsDeriveResult computeProject(Project project) {
private Statistics computeProject(Project project) {
List<NamedExpression> projections = project.getProjects();
StatsDeriveResult childStats = groupExpression.childStatistics(0);
Map<Id, ColumnStatistic> childColumnStats = childStats.getSlotIdToColumnStats();
Map<Id, ColumnStatistic> columnsStats = projections.stream().map(projection -> {
ColumnStatistic value = null;
Set<Slot> slots = projection.getInputSlots();
if (slots.isEmpty()) {
value = ColumnStatistic.DEFAULT;
} else {
// TODO: just a trick here, need to do real project on column stats
for (Slot slot : slots) {
if (childColumnStats.containsKey(slot.getExprId())) {
value = childColumnStats.get(slot.getExprId());
break;
}
}
if (value == null) {
value = ColumnStatistic.DEFAULT;
}
}
return new SimpleEntry<>(projection.toSlot().getExprId(), value);
Statistics childStats = groupExpression.childStatistics(0);
Map<Expression, ColumnStatistic> columnsStats = projections.stream().map(projection -> {
ColumnStatistic columnStatistic = ExpressionEstimation.estimate(projection, childStats);
return new SimpleEntry<>(projection.toSlot(), columnStatistic);
}).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (item1, item2) -> item1));
return new StatsDeriveResult(childStats.getRowCount(), childStats.getWidth(),
childStats.getPenalty(), columnsStats);
return new Statistics(childStats.getRowCount(), columnsStats, childStats.getWidth(), childStats.getPenalty());
}
private StatsDeriveResult computeOneRowRelation(OneRowRelation oneRowRelation) {
Map<Id, ColumnStatistic> columnStatsMap = oneRowRelation.getProjects()
private Statistics computeOneRowRelation(OneRowRelation oneRowRelation) {
Map<Expression, ColumnStatistic> columnStatsMap = oneRowRelation.getProjects()
.stream()
.map(project -> {
ColumnStatistic statistic = new ColumnStatisticBuilder().setNdv(1).build();
// TODO: compute the literal size
return Pair.of(project.toSlot().getExprId(), statistic);
return Pair.of(project.toSlot(), statistic);
})
.collect(Collectors.toMap(Pair::key, Pair::value));
int rowCount = 1;
return new StatsDeriveResult(rowCount, columnStatsMap);
return new Statistics(rowCount, columnStatsMap);
}
private StatsDeriveResult computeEmptyRelation(EmptyRelation emptyRelation) {
Map<Id, ColumnStatistic> columnStatsMap = emptyRelation.getProjects()
private Statistics computeEmptyRelation(EmptyRelation emptyRelation) {
Map<Expression, ColumnStatistic> columnStatsMap = emptyRelation.getProjects()
.stream()
.map(project -> {
ColumnStatisticBuilder columnStat = new ColumnStatisticBuilder()
.setNdv(0)
.setNumNulls(0)
.setAvgSizeByte(0);
return Pair.of(project.toSlot().getExprId(), columnStat.build());
return Pair.of(project.toSlot(), columnStat.build());
})
.collect(Collectors.toMap(Pair::key, Pair::value));
int rowCount = 0;
return new StatsDeriveResult(rowCount, columnStatsMap);
return new Statistics(rowCount, columnStatsMap);
}
private StatsDeriveResult computeUnion(SetOperation setOperation) {
StatsDeriveResult leftStatsResult = groupExpression.childStatistics(0);
Map<Id, ColumnStatistic> leftStatsSlotIdToColumnStats = leftStatsResult.getSlotIdToColumnStats();
Map<Id, ColumnStatistic> newColumnStatsMap = new HashMap<>();
double rowCount = leftStatsResult.getRowCount();
for (int j = 0; j < setOperation.getArity() - 1; ++j) {
StatsDeriveResult rightStatsResult = groupExpression.childStatistics(j + 1);
Map<Id, ColumnStatistic> rightStatsSlotIdToColumnStats = rightStatsResult.getSlotIdToColumnStats();
for (int i = 0; i < setOperation.getOutputs().size(); ++i) {
Slot leftSlot = getLeftSlot(j, i, setOperation);
Slot rightSlot = setOperation.getChildOutput(j + 1).get(i);
ColumnStatistic leftStats = getLeftStats(j, leftSlot, leftStatsSlotIdToColumnStats, newColumnStatsMap);
ColumnStatistic rightStats = rightStatsSlotIdToColumnStats.get(rightSlot.getExprId());
newColumnStatsMap.put(setOperation.getOutputs().get(i).getExprId(), new ColumnStatistic(
leftStats.count + rightStats.count,
leftStats.ndv + rightStats.ndv,
leftStats.avgSizeByte,
leftStats.numNulls + rightStats.numNulls,
leftStats.dataSize + rightStats.dataSize,
Math.min(leftStats.minValue, rightStats.minValue),
Math.max(leftStats.maxValue, rightStats.maxValue),
1.0 / (leftStats.ndv + rightStats.ndv),
leftStats.minExpr,
leftStats.maxExpr,
leftStats.isUnKnown));
private Statistics computeUnion(SetOperation setOperation) {
List<Slot> head = groupExpression.child(0).getLogicalProperties().getOutput();
Statistics headStats = groupExpression.childStatistics(0);
List<List<Slot>> childOutputs =
groupExpression.children()
.stream().map(ge -> ge.getLogicalProperties().getOutput()).collect(Collectors.toList());
List<Statistics> childStats =
groupExpression.children().stream().map(Group::getStatistics).collect(Collectors.toList());
StatisticsBuilder statisticsBuilder = new StatisticsBuilder();
List<NamedExpression> unionOutput = setOperation.getOutputs();
for (int i = 0; i < head.size(); i++) {
double leftRowCount = headStats.getRowCount();
Slot headSlot = head.get(i);
for (int j = 1; j < childOutputs.size(); j++) {
Slot slot = childOutputs.get(j).get(i);
ColumnStatistic rightStatistic = childStats.get(j).findColumnStatistics(slot);
double rightRowCount = childStats.get(j).getRowCount();
ColumnStatistic estimatedColumnStatistics
= unionColumn(headStats.findColumnStatistics(headSlot),
headStats.getRowCount(), rightStatistic, rightRowCount);
headStats.addColumnStats(headSlot, estimatedColumnStatistics);
leftRowCount += childStats.get(j).getRowCount();
}
rowCount = Math.min(rowCount, rightStatsResult.getRowCount());
statisticsBuilder.setRowCount(leftRowCount);
statisticsBuilder.putColumnStatistics(unionOutput.get(i), headStats.findColumnStatistics(headSlot));
}
return new StatsDeriveResult(rowCount, newColumnStatsMap);
return statisticsBuilder.build();
}
private Slot getLeftSlot(int fistSetOperation, int outputSlotIdx, SetOperation setOperation) {
@ -587,45 +553,58 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
private ColumnStatistic getLeftStats(int fistSetOperation,
Slot leftSlot,
Map<Id, ColumnStatistic> leftStatsSlotIdToColumnStats,
Map<Id, ColumnStatistic> newColumnStatsMap) {
Map<Expression, ColumnStatistic> leftStatsSlotIdToColumnStats,
Map<Expression, ColumnStatistic> newColumnStatsMap) {
return fistSetOperation == 0
? leftStatsSlotIdToColumnStats.get(leftSlot.getExprId())
: newColumnStatsMap.get(leftSlot.getExprId());
}
private StatsDeriveResult computeExcept(SetOperation setOperation) {
StatsDeriveResult leftStatsResult = groupExpression.childStatistics(0);
return new StatsDeriveResult(leftStatsResult.getRowCount(),
replaceExprIdWithCurrentOutput(setOperation, leftStatsResult));
private Statistics computeExcept(SetOperation setOperation) {
Statistics leftStats = groupExpression.childStatistics(0);
List<NamedExpression> operatorOutput = setOperation.getOutputs();
List<Slot> childSlots = groupExpression.child(0).getLogicalProperties().getOutput();
StatisticsBuilder statisticsBuilder = new StatisticsBuilder();
for (int i = 0; i < operatorOutput.size(); i++) {
ColumnStatistic columnStatistic = leftStats.findColumnStatistics(childSlots.get(i));
statisticsBuilder.putColumnStatistics(operatorOutput.get(i), columnStatistic);
}
statisticsBuilder.setRowCount(leftStats.getRowCount());
return statisticsBuilder.build();
}
private StatsDeriveResult computeIntersect(SetOperation setOperation) {
StatsDeriveResult leftStatsResult = groupExpression.childStatistics(0);
double rowCount = leftStatsResult.getRowCount();
private Statistics computeIntersect(SetOperation setOperation) {
Statistics leftChildStats = groupExpression.childStatistics(0);
double rowCount = leftChildStats.getRowCount();
for (int i = 1; i < setOperation.getArity(); ++i) {
rowCount = Math.min(rowCount, groupExpression.childStatistics(i).getRowCount());
}
return new StatsDeriveResult(rowCount, replaceExprIdWithCurrentOutput(setOperation, leftStatsResult));
}
private Map<Id, ColumnStatistic> replaceExprIdWithCurrentOutput(
SetOperation setOperation, StatsDeriveResult leftStatsResult) {
Map<Id, ColumnStatistic> newColumnStatsMap = new HashMap<>();
for (int i = 0; i < setOperation.getOutputs().size(); i++) {
NamedExpression namedExpression = setOperation.getOutputs().get(i);
Slot childSlot = setOperation.getChildOutput(0).get(i);
newColumnStatsMap.put(namedExpression.getExprId(),
leftStatsResult.getSlotIdToColumnStats().get(childSlot.getExprId()));
double minProd = Double.MAX_VALUE;
for (Group group : groupExpression.children()) {
Statistics statistics = group.getStatistics();
double prod = 1.0;
for (ColumnStatistic columnStatistic : statistics.columnStatistics().values()) {
prod *= columnStatistic.ndv;
}
if (minProd < prod) {
minProd = prod;
}
}
return newColumnStatsMap;
rowCount = Math.min(rowCount, minProd);
List<NamedExpression> outputs = setOperation.getOutputs();
List<Slot> leftChildOutputs = setOperation.getChildOutput(0);
for (int i = 0; i < outputs.size(); i++) {
leftChildStats.addColumnStats(outputs.get(i),
leftChildStats.findColumnStatistics(leftChildOutputs.get(i)));
}
return leftChildStats.withRowCount(rowCount);
}
private StatsDeriveResult computeGenerate(Generate generate) {
StatsDeriveResult stats = groupExpression.childStatistics(0);
private Statistics computeGenerate(Generate generate) {
Statistics stats = groupExpression.childStatistics(0);
double count = stats.getRowCount() * generate.getGeneratorOutput().size() * 5;
Map<Id, ColumnStatistic> columnStatsMap = Maps.newHashMap();
for (Map.Entry<Id, ColumnStatistic> entry : stats.getSlotIdToColumnStats().entrySet()) {
Map<Expression, ColumnStatistic> columnStatsMap = Maps.newHashMap();
for (Map.Entry<Expression, ColumnStatistic> entry : stats.columnStatistics().entrySet()) {
ColumnStatistic columnStatistic = new ColumnStatisticBuilder(entry.getValue()).setCount(count).build();
columnStatsMap.put(entry.getKey(), columnStatistic);
}
@ -638,35 +617,58 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
.setNumNulls(0)
.setAvgSizeByte(output.getDataType().width())
.build();
columnStatsMap.put(output.getExprId(), columnStatistic);
columnStatsMap.put(output, columnStatistic);
}
return new StatsDeriveResult(count, columnStatsMap);
return new Statistics(count, columnStatsMap);
}
private StatsDeriveResult computeWindow(Window windowOperator) {
StatsDeriveResult stats = groupExpression.childStatistics(0);
Map<Id, ColumnStatistic> childColumnStats = stats.getSlotIdToColumnStats();
Map<Id, ColumnStatistic> columnStatisticMap = windowOperator.getWindowExpressions().stream()
private Statistics computeWindow(Window windowOperator) {
Statistics stats = groupExpression.childStatistics(0);
Map<Expression, ColumnStatistic> childColumnStats = stats.columnStatistics();
Map<Expression, ColumnStatistic> columnStatisticMap = windowOperator.getWindowExpressions().stream()
.map(expr -> {
ColumnStatistic value = null;
Set<Slot> slots = expr.getInputSlots();
if (slots.isEmpty()) {
value = ColumnStatistic.DEFAULT;
value = ColumnStatistic.UNKNOWN;
} else {
for (Slot slot : slots) {
if (childColumnStats.containsKey(slot.getExprId())) {
value = childColumnStats.get(slot.getExprId());
if (childColumnStats.containsKey(slot)) {
value = childColumnStats.get(slot);
break;
}
}
if (value == null) {
// todo: how to set stats?
value = ColumnStatistic.DEFAULT;
value = ColumnStatistic.UNKNOWN;
}
}
return Pair.of(expr.toSlot().getExprId(), value);
return Pair.of(expr.toSlot(), value);
}).collect(Collectors.toMap(Pair::key, Pair::value));
columnStatisticMap.putAll(childColumnStats);
return new StatsDeriveResult(stats.getRowCount(), columnStatisticMap);
return new Statistics(stats.getRowCount(), columnStatisticMap);
}
private ColumnStatistic unionColumn(ColumnStatistic leftStats, double leftRowCount, ColumnStatistic rightStats,
double rightRowCount) {
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder();
columnStatisticBuilder.setMaxValue(Math.max(leftStats.maxValue, rightStats.maxValue));
columnStatisticBuilder.setMinValue(Math.min(leftStats.minValue, rightStats.minValue));
StatisticRange leftRange = StatisticRange.from(leftStats);
StatisticRange rightRange = StatisticRange.from(rightStats);
StatisticRange newRange = leftRange.union(rightRange);
double newRowCount = leftRowCount + rightRowCount;
double leftSize = (leftRowCount - leftStats.numNulls) * leftStats.avgSizeByte;
double rightSize = (rightRowCount - rightStats.numNulls) * rightStats.avgSizeByte;
double newNullFraction = (leftStats.numNulls + rightStats.numNulls) / StatsMathUtil.maxNonNaN(1, newRowCount);
double newNonNullRowCount = newRowCount * (1 - newNullFraction);
double newAverageRowSize = newNonNullRowCount == 0 ? 0 : (leftSize + rightSize) / newNonNullRowCount;
columnStatisticBuilder.setMinValue(newRange.getLow())
.setMaxValue(newRange.getHigh())
.setNdv(newRange.getDistinctValues())
.setNumNulls(leftStats.numNulls + rightStats.numNulls)
.setAvgSizeByte(newAverageRowSize);
return columnStatisticBuilder.build();
}
}

View File

@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.stats;
/**
* Math util for statistics derivation
*/
public class StatsMathUtil {
public static double nonZeroDivisor(double d) {
return d == 0.0 ? 1 : d;
}
/**
* Try to find non NaN min.
*/
public static double minNonNaN(double a, double b) {
if (Double.isNaN(a)) {
return b;
}
if (Double.isNaN(b)) {
return a;
}
return Math.min(a, b);
}
/**
* Try to find non NaN max.
*/
public static double maxNonNaN(double a, double b) {
if (Double.isNaN(a)) {
return b;
}
if (Double.isNaN(b)) {
return a;
}
return Math.max(a, b);
}
}

View File

@ -20,7 +20,6 @@ package org.apache.doris.nereids.trees.expressions.functions.table;
import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.Id;
import org.apache.doris.common.NereidsException;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.properties.PhysicalProperties;
@ -30,7 +29,8 @@ import org.apache.doris.nereids.trees.expressions.TVFProperties;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.BigIntType;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.tablefunction.NumbersTableValuedFunction;
import org.apache.doris.tablefunction.TableValuedFunctionIf;
@ -63,17 +63,21 @@ public class Numbers extends TableValuedFunction {
}
@Override
public StatsDeriveResult computeStats(List<Slot> slots) {
public Statistics computeStats(List<Slot> slots) {
Preconditions.checkArgument(slots.size() == 1);
try {
NumbersTableValuedFunction catalogFunction = (NumbersTableValuedFunction) getCatalogFunction();
long rowNum = catalogFunction.getTotalNumbers();
Map<Id, ColumnStatistic> columnToStatistics = Maps.newHashMap();
ColumnStatistic columnStat = new ColumnStatistic(rowNum, rowNum, 8, 0, 8, 0, rowNum - 1,
1.0 / rowNum, new IntLiteral(0, Type.BIGINT), new IntLiteral(rowNum - 1, Type.BIGINT), false);
columnToStatistics.put(slots.get(0).getExprId(), columnStat);
return new StatsDeriveResult(rowNum, columnToStatistics);
Map<Expression, ColumnStatistic> columnToStatistics = Maps.newHashMap();
ColumnStatistic columnStat = new ColumnStatisticBuilder()
.setCount(rowNum).setNdv(rowNum).setAvgSizeByte(8).setNumNulls(0).setDataSize(8).setMinValue(0)
.setMaxValue(rowNum - 1).setSelectivity(1.0 / rowNum)
.setMinExpr(new IntLiteral(0, Type.BIGINT))
.setMaxExpr(new IntLiteral(rowNum - 1, Type.BIGINT))
.build();
columnToStatistics.put(slots.get(0), columnStat);
return new Statistics(rowNum, columnToStatistics);
} catch (Exception t) {
throw new NereidsException(t.getMessage(), t);
}

View File

@ -29,7 +29,7 @@ import org.apache.doris.nereids.trees.expressions.functions.CustomSignature;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.tablefunction.TableValuedFunctionIf;
import com.google.common.base.Suppliers;
@ -58,7 +58,7 @@ public abstract class TableValuedFunction extends BoundFunction implements Unary
protected abstract TableValuedFunctionIf toCatalogFunction();
public abstract StatsDeriveResult computeStats(List<Slot> slots);
public abstract Statistics computeStats(List<Slot> slots);
public TVFProperties getTVFProperties() {
return (TVFProperties) child(0);

View File

@ -34,7 +34,7 @@ import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.util.MutableState;
import org.apache.doris.nereids.util.MutableState.EmptyMutableState;
import org.apache.doris.nereids.util.TreeStringUtils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
@ -54,7 +54,7 @@ public abstract class AbstractPlan extends AbstractTreeNode<Plan> implements Pla
.addEnhancers(new AddCounterEventEnhancer())
.addConsumers(new LogConsumer(CounterEvent.class, EventChannel.LOG)));
protected final StatsDeriveResult statsDeriveResult;
protected final Statistics statistics;
protected final PlanType type;
protected final Optional<GroupExpression> groupExpression;
protected final Supplier<LogicalProperties> logicalPropertiesSupplier;
@ -78,7 +78,7 @@ public abstract class AbstractPlan extends AbstractTreeNode<Plan> implements Pla
* all parameter constructor.
*/
public AbstractPlan(PlanType type, Optional<GroupExpression> groupExpression,
Optional<LogicalProperties> optLogicalProperties, @Nullable StatsDeriveResult statsDeriveResult,
Optional<LogicalProperties> optLogicalProperties, @Nullable Statistics statistics,
Plan... children) {
super(groupExpression, children);
this.type = Objects.requireNonNull(type, "type can not be null");
@ -86,7 +86,7 @@ public abstract class AbstractPlan extends AbstractTreeNode<Plan> implements Pla
Objects.requireNonNull(optLogicalProperties, "logicalProperties can not be null");
this.logicalPropertiesSupplier = Suppliers.memoize(() -> optLogicalProperties.orElseGet(
this::computeLogicalProperties));
this.statsDeriveResult = statsDeriveResult;
this.statistics = statistics;
PLAN_CONSTRUCT_TRACER.log(CounterEvent.of(Memo.getStateId(), CounterType.PLAN_CONSTRUCTOR, null, null, null));
}
@ -99,8 +99,8 @@ public abstract class AbstractPlan extends AbstractTreeNode<Plan> implements Pla
return groupExpression;
}
public StatsDeriveResult getStats() {
return statsDeriveResult;
public Statistics getStats() {
return statistics;
}
@Override

View File

@ -24,7 +24,7 @@ import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.logical.LogicalLeaf;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
@ -60,7 +60,7 @@ public class GroupPlan extends LogicalLeaf {
}
@Override
public StatsDeriveResult getStats() {
public Statistics getStats() {
throw new IllegalStateException("GroupPlan can not invoke getStats()");
}

View File

@ -25,7 +25,7 @@ import org.apache.doris.nereids.trees.plans.AbstractPlan;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import org.jetbrains.annotations.Nullable;
@ -47,9 +47,9 @@ public abstract class Command extends AbstractPlan implements LogicalPlan {
public Command(PlanType type, Optional<GroupExpression> groupExpression,
Optional<LogicalProperties> optLogicalProperties,
@Nullable StatsDeriveResult statsDeriveResult,
@Nullable Statistics statistics,
Plan... children) {
super(type, groupExpression, optLogicalProperties, statsDeriveResult, children);
super(type, groupExpression, optLogicalProperties, statistics, children);
}
@Override

View File

@ -28,7 +28,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Join;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
@ -90,10 +90,10 @@ public abstract class AbstractPhysicalJoin<
Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties,
PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult,
Statistics statistics,
LEFT_CHILD_TYPE leftChild,
RIGHT_CHILD_TYPE rightChild) {
super(type, groupExpression, logicalProperties, physicalProperties, statsDeriveResult, leftChild, rightChild);
super(type, groupExpression, logicalProperties, physicalProperties, statistics, leftChild, rightChild);
this.joinType = Objects.requireNonNull(joinType, "joinType can not be null");
this.hashJoinConjuncts = ImmutableList.copyOf(hashJoinConjuncts);
this.otherJoinConjuncts = ImmutableList.copyOf(otherJoinConjuncts);

View File

@ -23,7 +23,7 @@ import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.plans.AbstractPlan;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.Optional;
import javax.annotation.Nullable;
@ -46,8 +46,8 @@ public abstract class AbstractPhysicalPlan extends AbstractPlan implements Physi
public AbstractPhysicalPlan(PlanType type, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, @Nullable PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult, Plan... children) {
super(type, groupExpression, Optional.of(logicalProperties), statsDeriveResult, children);
Statistics statistics, Plan... children) {
super(type, groupExpression, Optional.of(logicalProperties), statistics, children);
this.physicalProperties = physicalProperties == null ? PhysicalProperties.ANY : physicalProperties;
}

View File

@ -26,7 +26,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.SortPhase;
import org.apache.doris.nereids.trees.plans.algebra.Sort;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
@ -58,8 +58,8 @@ public abstract class AbstractPhysicalSort<CHILD_TYPE extends Plan> extends Phys
*/
public AbstractPhysicalSort(PlanType type, List<OrderKey> orderKeys,
SortPhase phase, Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
super(type, groupExpression, logicalProperties, physicalProperties, statsDeriveResult, child);
PhysicalProperties physicalProperties, Statistics statistics, CHILD_TYPE child) {
super(type, groupExpression, logicalProperties, physicalProperties, statistics, child);
this.orderKeys = ImmutableList.copyOf(Objects.requireNonNull(orderKeys, "orderKeys can not be null"));
this.phase = phase;
}

View File

@ -27,7 +27,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@ -51,9 +51,9 @@ public class PhysicalAssertNumRows<CHILD_TYPE extends Plan> extends PhysicalUnar
public PhysicalAssertNumRows(AssertNumRowsElement assertNumRowsElement, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
Statistics statistics, CHILD_TYPE child) {
super(PlanType.PHYSICAL_ASSERT_NUM_ROWS, groupExpression, logicalProperties, physicalProperties,
statsDeriveResult, child);
statistics, child);
this.assertNumRowsElement = assertNumRowsElement;
}
@ -111,19 +111,19 @@ public class PhysicalAssertNumRows<CHILD_TYPE extends Plan> extends PhysicalUnar
@Override
public PhysicalAssertNumRows<CHILD_TYPE> withGroupExpression(Optional<GroupExpression> groupExpression) {
return new PhysicalAssertNumRows<>(assertNumRowsElement, groupExpression,
getLogicalProperties(), physicalProperties, statsDeriveResult, child());
getLogicalProperties(), physicalProperties, statistics, child());
}
@Override
public PhysicalAssertNumRows<CHILD_TYPE> withLogicalProperties(Optional<LogicalProperties> logicalProperties) {
return new PhysicalAssertNumRows<>(assertNumRowsElement, Optional.empty(),
logicalProperties.get(), physicalProperties, statsDeriveResult, child());
logicalProperties.get(), physicalProperties, statistics, child());
}
@Override
public PhysicalAssertNumRows<CHILD_TYPE> withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalAssertNumRows<>(assertNumRowsElement, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult, child());
getLogicalProperties(), physicalProperties, statistics, child());
}
}

View File

@ -23,7 +23,7 @@ import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.plans.BinaryPlan;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.Optional;
import javax.annotation.Nullable;
@ -43,7 +43,7 @@ public abstract class PhysicalBinary<LEFT_CHILD_TYPE extends Plan, RIGHT_CHILD_T
public PhysicalBinary(PlanType type, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, @Nullable PhysicalProperties physicalProperties,
@Nullable StatsDeriveResult statsDeriveResult, LEFT_CHILD_TYPE leftChild, RIGHT_CHILD_TYPE rightChild) {
super(type, groupExpression, logicalProperties, physicalProperties, statsDeriveResult, leftChild, rightChild);
@Nullable Statistics statistics, LEFT_CHILD_TYPE leftChild, RIGHT_CHILD_TYPE rightChild) {
super(type, groupExpression, logicalProperties, physicalProperties, statistics, leftChild, rightChild);
}
}

View File

@ -26,7 +26,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@ -53,8 +53,8 @@ public class PhysicalDistribute<CHILD_TYPE extends Plan> extends PhysicalUnary<C
public PhysicalDistribute(DistributionSpec spec, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
super(PlanType.PHYSICAL_DISTRIBUTION, groupExpression, logicalProperties, physicalProperties, statsDeriveResult,
Statistics statistics, CHILD_TYPE child) {
super(PlanType.PHYSICAL_DISTRIBUTION, groupExpression, logicalProperties, physicalProperties, statistics,
child);
this.distributionSpec = spec;
}
@ -63,7 +63,7 @@ public class PhysicalDistribute<CHILD_TYPE extends Plan> extends PhysicalUnary<C
public String toString() {
return Utils.toSqlString("PhysicalDistribute[" + id.asInt() + "]",
"distributionSpec", distributionSpec,
"stats", statsDeriveResult
"stats", statistics
);
}
@ -101,8 +101,8 @@ public class PhysicalDistribute<CHILD_TYPE extends Plan> extends PhysicalUnary<C
@Override
public PhysicalDistribute<CHILD_TYPE> withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalDistribute<>(distributionSpec, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult, child());
getLogicalProperties(), physicalProperties, statistics, child());
}
}

View File

@ -28,7 +28,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.EmptyRelation;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
@ -51,9 +51,9 @@ public class PhysicalEmptyRelation extends PhysicalLeaf implements EmptyRelation
public PhysicalEmptyRelation(List<? extends NamedExpression> projects, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
super(PlanType.PHYSICAL_EMPTY_RELATION, groupExpression, logicalProperties, physicalProperties,
statsDeriveResult);
statistics);
this.projects = ImmutableList.copyOf(Objects.requireNonNull(projects, "projects can not be null"));
}
@ -70,13 +70,13 @@ public class PhysicalEmptyRelation extends PhysicalLeaf implements EmptyRelation
@Override
public Plan withGroupExpression(Optional<GroupExpression> groupExpression) {
return new PhysicalEmptyRelation(projects, groupExpression,
logicalPropertiesSupplier.get(), physicalProperties, statsDeriveResult);
logicalPropertiesSupplier.get(), physicalProperties, statistics);
}
@Override
public Plan withLogicalProperties(Optional<LogicalProperties> logicalProperties) {
return new PhysicalEmptyRelation(projects, Optional.empty(),
logicalProperties.get(), physicalProperties, statsDeriveResult);
logicalProperties.get(), physicalProperties, statistics);
}
@Override
@ -120,8 +120,8 @@ public class PhysicalEmptyRelation extends PhysicalLeaf implements EmptyRelation
@Override
public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalEmptyRelation(projects, Optional.empty(),
logicalPropertiesSupplier.get(), physicalProperties, statsDeriveResult);
logicalPropertiesSupplier.get(), physicalProperties, statistics);
}
}

View File

@ -26,7 +26,7 @@ import org.apache.doris.nereids.trees.plans.ObjectId;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.List;
import java.util.Objects;
@ -57,9 +57,9 @@ public class PhysicalEsScan extends PhysicalRelation {
public PhysicalEsScan(ObjectId id, ExternalTable table, List<String> qualifier,
DistributionSpec distributionSpec, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
super(id, PlanType.PHYSICAL_ES_SCAN, qualifier, groupExpression, logicalProperties,
physicalProperties, statsDeriveResult);
physicalProperties, statistics);
this.table = table;
this.distributionSpec = distributionSpec;
}
@ -69,7 +69,7 @@ public class PhysicalEsScan extends PhysicalRelation {
return Utils.toSqlString("PhysicalEsScan",
"qualified", Utils.qualifiedName(qualifier, table.getName()),
"output", getOutput(),
"stats", statsDeriveResult
"stats", statistics
);
}
@ -112,7 +112,7 @@ public class PhysicalEsScan extends PhysicalRelation {
@Override
public PhysicalEsScan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statsDeriveResult) {
return new PhysicalEsScan(id, table, qualifier, distributionSpec, groupExpression, getLogicalProperties(),
physicalProperties, statsDeriveResult);
}

View File

@ -24,7 +24,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.List;
import java.util.Optional;
@ -49,10 +49,10 @@ public class PhysicalExcept extends PhysicalSetOperation {
public PhysicalExcept(Qualifier qualifier, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult,
PhysicalProperties physicalProperties, Statistics statistics,
List<Plan> inputs) {
super(PlanType.PHYSICAL_EXCEPT, qualifier,
groupExpression, logicalProperties, physicalProperties, statsDeriveResult, inputs);
groupExpression, logicalProperties, physicalProperties, statistics, inputs);
}
@Override
@ -64,7 +64,7 @@ public class PhysicalExcept extends PhysicalSetOperation {
public String toString() {
return Utils.toSqlString("PhysicalExcept",
"qualifier", qualifier,
"stats", statsDeriveResult);
"stats", statistics);
}
@Override
@ -86,8 +86,8 @@ public class PhysicalExcept extends PhysicalSetOperation {
@Override
public PhysicalExcept withPhysicalPropertiesAndStats(
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult) {
PhysicalProperties physicalProperties, Statistics statistics) {
return new PhysicalExcept(qualifier, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult, children);
getLogicalProperties(), physicalProperties, statistics, children);
}
}

View File

@ -26,7 +26,7 @@ import org.apache.doris.nereids.trees.plans.ObjectId;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.List;
import java.util.Objects;
@ -57,9 +57,9 @@ public class PhysicalFileScan extends PhysicalRelation {
public PhysicalFileScan(ObjectId id, ExternalTable table, List<String> qualifier,
DistributionSpec distributionSpec, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
super(id, PlanType.PHYSICAL_FILE_SCAN, qualifier, groupExpression, logicalProperties,
physicalProperties, statsDeriveResult);
physicalProperties, statistics);
this.table = table;
this.distributionSpec = distributionSpec;
}
@ -69,7 +69,7 @@ public class PhysicalFileScan extends PhysicalRelation {
return Utils.toSqlString("PhysicalFileScan",
"qualified", Utils.qualifiedName(qualifier, table.getName()),
"output", getOutput(),
"stats", statsDeriveResult
"stats", statistics
);
}
@ -112,8 +112,8 @@ public class PhysicalFileScan extends PhysicalRelation {
@Override
public PhysicalFileScan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalFileScan(id, table, qualifier, distributionSpec, groupExpression, getLogicalProperties(),
physicalProperties, statsDeriveResult);
physicalProperties, statistics);
}
}

View File

@ -26,7 +26,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Filter;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@ -56,8 +56,8 @@ public class PhysicalFilter<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD
public PhysicalFilter(Set<Expression> conjuncts, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
super(PlanType.PHYSICAL_FILTER, groupExpression, logicalProperties, physicalProperties, statsDeriveResult,
Statistics statistics, CHILD_TYPE child) {
super(PlanType.PHYSICAL_FILTER, groupExpression, logicalProperties, physicalProperties, statistics,
child);
this.conjuncts = ImmutableSet.copyOf(Objects.requireNonNull(conjuncts, "conjuncts can not be null"));
}
@ -75,7 +75,7 @@ public class PhysicalFilter<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD
public String toString() {
return Utils.toSqlString("PhysicalFilter[" + id.asInt() + "]",
"predicates", getPredicate(),
"stats", statsDeriveResult
"stats", statistics
);
}
@ -119,8 +119,8 @@ public class PhysicalFilter<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD
@Override
public PhysicalFilter<CHILD_TYPE> withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalFilter<>(conjuncts, Optional.empty(), getLogicalProperties(), physicalProperties,
statsDeriveResult, child());
statistics, child());
}
}

View File

@ -28,7 +28,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Generate;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@ -67,8 +67,8 @@ public class PhysicalGenerate<CHILD_TYPE extends Plan> extends PhysicalUnary<CHI
public PhysicalGenerate(List<Function> generators, List<Slot> generatorOutput,
Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
super(PlanType.PHYSICAL_FILTER, groupExpression, logicalProperties, physicalProperties, statsDeriveResult,
Statistics statistics, CHILD_TYPE child) {
super(PlanType.PHYSICAL_FILTER, groupExpression, logicalProperties, physicalProperties, statistics,
child);
this.generators = ImmutableList.copyOf(Objects.requireNonNull(generators, "predicates can not be null"));
this.generatorOutput = ImmutableList.copyOf(Objects.requireNonNull(generatorOutput,
@ -145,9 +145,9 @@ public class PhysicalGenerate<CHILD_TYPE extends Plan> extends PhysicalUnary<CHI
@Override
public PhysicalGenerate<CHILD_TYPE> withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalGenerate<>(generators, generatorOutput,
Optional.empty(), getLogicalProperties(), physicalProperties,
statsDeriveResult, child());
statistics, child());
}
}

View File

@ -32,7 +32,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Aggregate;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@ -112,8 +112,8 @@ public class PhysicalHashAggregate<CHILD_TYPE extends Plan> extends PhysicalUnar
Optional<List<Expression>> partitionExpressions, AggregateParam aggregateParam, boolean maybeUsingStream,
Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
RequireProperties requireProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
super(PlanType.PHYSICAL_AGGREGATE, groupExpression, logicalProperties, physicalProperties, statsDeriveResult,
Statistics statistics, CHILD_TYPE child) {
super(PlanType.PHYSICAL_AGGREGATE, groupExpression, logicalProperties, physicalProperties, statistics,
child);
this.groupByExpressions = ImmutableList.copyOf(
Objects.requireNonNull(groupByExpressions, "groupByExpressions cannot be null"));
@ -190,7 +190,7 @@ public class PhysicalHashAggregate<CHILD_TYPE extends Plan> extends PhysicalUnar
"outputExpr", outputExpressions,
"partitionExpr", partitionExpressions,
"requireProperties", requireProperties,
"stats", statsDeriveResult
"stats", statistics
);
}
@ -247,10 +247,10 @@ public class PhysicalHashAggregate<CHILD_TYPE extends Plan> extends PhysicalUnar
@Override
public PhysicalHashAggregate<CHILD_TYPE> withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalHashAggregate<>(groupByExpressions, outputExpressions, partitionExpressions,
aggregateParam, maybeUsingStream, Optional.empty(), getLogicalProperties(),
requireProperties, physicalProperties, statsDeriveResult,
requireProperties, physicalProperties, statistics,
child());
}
@ -258,13 +258,13 @@ public class PhysicalHashAggregate<CHILD_TYPE extends Plan> extends PhysicalUnar
public PhysicalHashAggregate<CHILD_TYPE> withAggOutput(List<NamedExpression> newOutput) {
return new PhysicalHashAggregate<>(groupByExpressions, newOutput, partitionExpressions,
aggregateParam, maybeUsingStream, Optional.empty(), getLogicalProperties(),
requireProperties, physicalProperties, statsDeriveResult, child());
requireProperties, physicalProperties, statistics, child());
}
public <C extends Plan> PhysicalHashAggregate<C> withRequirePropertiesAndChild(
RequireProperties requireProperties, C newChild) {
return new PhysicalHashAggregate<>(groupByExpressions, outputExpressions, partitionExpressions,
aggregateParam, maybeUsingStream, Optional.empty(), getLogicalProperties(),
requireProperties, physicalProperties, statsDeriveResult, newChild);
requireProperties, physicalProperties, statistics, newChild);
}
}

View File

@ -28,7 +28,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
@ -91,11 +91,11 @@ public class PhysicalHashJoin<
Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties,
PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult,
Statistics statistics,
LEFT_CHILD_TYPE leftChild,
RIGHT_CHILD_TYPE rightChild) {
super(PlanType.PHYSICAL_HASH_JOIN, joinType, hashJoinConjuncts, otherJoinConjuncts, hint, markJoinSlotReference,
groupExpression, logicalProperties, physicalProperties, statsDeriveResult, leftChild, rightChild);
groupExpression, logicalProperties, physicalProperties, statistics, leftChild, rightChild);
}
@Override
@ -110,7 +110,7 @@ public class PhysicalHashJoin<
"otherJoinCondition", otherJoinConjuncts,
"isMarkJoin", markJoinSlotReference.isPresent(),
"MarkJoinSlotReference", markJoinSlotReference.isPresent() ? markJoinSlotReference.get() : "empty",
"stats", statsDeriveResult);
"stats", statistics);
if (hint != JoinHint.NONE) {
args.add("hint");
args.add(hint);
@ -139,10 +139,9 @@ public class PhysicalHashJoin<
Optional.empty(), logicalProperties.get(), left(), right());
}
@Override
public PhysicalHashJoin<LEFT_CHILD_TYPE, RIGHT_CHILD_TYPE> withPhysicalPropertiesAndStats(
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult) {
PhysicalProperties physicalProperties, Statistics statistics) {
return new PhysicalHashJoin<>(joinType, hashJoinConjuncts, otherJoinConjuncts, hint, markJoinSlotReference,
Optional.empty(), getLogicalProperties(), physicalProperties, statsDeriveResult, left(), right());
Optional.empty(), getLogicalProperties(), physicalProperties, statistics, left(), right());
}
}

View File

@ -24,7 +24,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.List;
import java.util.Optional;
@ -49,10 +49,10 @@ public class PhysicalIntersect extends PhysicalSetOperation {
public PhysicalIntersect(Qualifier qualifier,
Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult,
PhysicalProperties physicalProperties, Statistics statistics,
List<Plan> inputs) {
super(PlanType.PHYSICAL_INTERSECT, qualifier,
groupExpression, logicalProperties, physicalProperties, statsDeriveResult, inputs);
groupExpression, logicalProperties, physicalProperties, statistics, inputs);
}
@Override
@ -64,7 +64,7 @@ public class PhysicalIntersect extends PhysicalSetOperation {
public String toString() {
return Utils.toSqlString("PhysicalIntersect",
"qualifier", qualifier,
"stats", statsDeriveResult);
"stats", statistics);
}
@Override
@ -86,8 +86,8 @@ public class PhysicalIntersect extends PhysicalSetOperation {
@Override
public PhysicalIntersect withPhysicalPropertiesAndStats(
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult) {
PhysicalProperties physicalProperties, Statistics statistics) {
return new PhysicalIntersect(qualifier,
Optional.empty(), getLogicalProperties(), physicalProperties, statsDeriveResult, children);
Optional.empty(), getLogicalProperties(), physicalProperties, statistics, children);
}
}

View File

@ -26,7 +26,7 @@ import org.apache.doris.nereids.trees.plans.ObjectId;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.List;
import java.util.Objects;
@ -57,9 +57,9 @@ public class PhysicalJdbcScan extends PhysicalRelation {
public PhysicalJdbcScan(ObjectId id, ExternalTable table, List<String> qualifier,
DistributionSpec distributionSpec, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
super(id, PlanType.PHYSICAL_JDBC_SCAN, qualifier, groupExpression, logicalProperties,
physicalProperties, statsDeriveResult);
physicalProperties, statistics);
this.table = table;
this.distributionSpec = distributionSpec;
}
@ -69,7 +69,7 @@ public class PhysicalJdbcScan extends PhysicalRelation {
return Utils.toSqlString("PhysicalJdbcScan",
"qualified", Utils.qualifiedName(qualifier, table.getName()),
"output", getOutput(),
"stats", statsDeriveResult
"stats", statistics
);
}
@ -112,8 +112,8 @@ public class PhysicalJdbcScan extends PhysicalRelation {
@Override
public PhysicalJdbcScan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalJdbcScan(id, table, qualifier, distributionSpec, groupExpression, getLogicalProperties(),
physicalProperties, statsDeriveResult);
physicalProperties, statistics);
}
}

View File

@ -22,7 +22,7 @@ import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.plans.LeafPlan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.Optional;
import javax.annotation.Nullable;
@ -37,7 +37,7 @@ public abstract class PhysicalLeaf extends AbstractPhysicalPlan implements LeafP
}
public PhysicalLeaf(PlanType type, Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
@Nullable PhysicalProperties physicalProperties, @Nullable StatsDeriveResult statsDeriveResult) {
super(type, groupExpression, logicalProperties, physicalProperties, statsDeriveResult);
@Nullable PhysicalProperties physicalProperties, @Nullable Statistics statistics) {
super(type, groupExpression, logicalProperties, physicalProperties, statistics);
}
}

View File

@ -27,7 +27,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Limit;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@ -76,8 +76,8 @@ public class PhysicalLimit<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD_
*/
public PhysicalLimit(long limit, long offset, LimitPhase phase, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
super(PlanType.PHYSICAL_LIMIT, groupExpression, logicalProperties, physicalProperties, statsDeriveResult,
Statistics statistics, CHILD_TYPE child) {
super(PlanType.PHYSICAL_LIMIT, groupExpression, logicalProperties, physicalProperties, statistics,
child);
this.limit = limit;
this.offset = offset;
@ -123,9 +123,9 @@ public class PhysicalLimit<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD_
@Override
public PhysicalLimit<CHILD_TYPE> withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalLimit<>(limit, offset, phase, groupExpression, getLogicalProperties(), physicalProperties,
statsDeriveResult, child());
statistics, child());
}
@Override
@ -156,7 +156,7 @@ public class PhysicalLimit<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD_
"limit", limit,
"offset", offset,
"phase", phase,
"stats", statsDeriveResult
"stats", statistics
);
}
}

View File

@ -28,7 +28,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
@ -98,13 +98,13 @@ public class PhysicalNestedLoopJoin<
Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties,
PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult,
Statistics statistics,
LEFT_CHILD_TYPE leftChild,
RIGHT_CHILD_TYPE rightChild) {
super(PlanType.PHYSICAL_NESTED_LOOP_JOIN, joinType, hashJoinConjuncts, otherJoinConjuncts,
// nested loop join ignores join hints.
JoinHint.NONE, markJoinSlotReference,
groupExpression, logicalProperties, physicalProperties, statsDeriveResult, leftChild, rightChild);
groupExpression, logicalProperties, physicalProperties, statistics, leftChild, rightChild);
}
@Override
@ -149,10 +149,10 @@ public class PhysicalNestedLoopJoin<
@Override
public PhysicalNestedLoopJoin<LEFT_CHILD_TYPE, RIGHT_CHILD_TYPE> withPhysicalPropertiesAndStats(
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult) {
PhysicalProperties physicalProperties, Statistics statistics) {
return new PhysicalNestedLoopJoin<>(joinType,
hashJoinConjuncts, otherJoinConjuncts, markJoinSlotReference, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult, left(), right());
getLogicalProperties(), physicalProperties, statistics, left(), right());
}
public void addBitmapRuntimeFilterCondition(Expression expr) {

View File

@ -28,7 +28,7 @@ import org.apache.doris.nereids.trees.plans.PreAggStatus;
import org.apache.doris.nereids.trees.plans.algebra.OlapScan;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
@ -69,9 +69,9 @@ public class PhysicalOlapScan extends PhysicalRelation implements OlapScan {
public PhysicalOlapScan(ObjectId id, OlapTable olapTable, List<String> qualifier, long selectedIndexId,
List<Long> selectedTabletIds, List<Long> selectedPartitionIds, DistributionSpec distributionSpec,
PreAggStatus preAggStatus, Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult) {
PhysicalProperties physicalProperties, Statistics statistics) {
super(id, PlanType.PHYSICAL_OLAP_SCAN, qualifier, groupExpression, logicalProperties, physicalProperties,
statsDeriveResult);
statistics);
this.olapTable = olapTable;
this.selectedIndexId = selectedIndexId;
this.selectedTabletIds = ImmutableList.copyOf(selectedTabletIds);
@ -111,7 +111,8 @@ public class PhysicalOlapScan extends PhysicalRelation implements OlapScan {
public String toString() {
return Utils.toSqlString("PhysicalOlapScan",
"qualified", Utils.qualifiedName(qualifier, olapTable.getName()),
"stats", statsDeriveResult
"output", getOutput(),
"stats", statistics
);
}
@ -155,9 +156,9 @@ public class PhysicalOlapScan extends PhysicalRelation implements OlapScan {
@Override
public PhysicalOlapScan withPhysicalPropertiesAndStats(
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult) {
PhysicalProperties physicalProperties, Statistics statistics) {
return new PhysicalOlapScan(id, olapTable, qualifier, selectedIndexId, selectedTabletIds,
selectedPartitionIds, distributionSpec, preAggStatus, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult);
getLogicalProperties(), physicalProperties, statistics);
}
}

View File

@ -27,7 +27,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.OneRowRelation;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
@ -53,9 +53,9 @@ public class PhysicalOneRowRelation extends PhysicalLeaf implements OneRowRelati
boolean buildUnionNode,
Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
super(PlanType.PHYSICAL_ONE_ROW_RELATION, groupExpression, logicalProperties, physicalProperties,
statsDeriveResult);
statistics);
this.projects = ImmutableList.copyOf(Objects.requireNonNull(projects, "projects can not be null"));
this.buildUnionNode = buildUnionNode;
}
@ -78,13 +78,13 @@ public class PhysicalOneRowRelation extends PhysicalLeaf implements OneRowRelati
@Override
public Plan withGroupExpression(Optional<GroupExpression> groupExpression) {
return new PhysicalOneRowRelation(projects, buildUnionNode, groupExpression,
logicalPropertiesSupplier.get(), physicalProperties, statsDeriveResult);
logicalPropertiesSupplier.get(), physicalProperties, statistics);
}
@Override
public Plan withLogicalProperties(Optional<LogicalProperties> logicalProperties) {
return new PhysicalOneRowRelation(projects, buildUnionNode, Optional.empty(),
logicalProperties.get(), physicalProperties, statsDeriveResult);
logicalProperties.get(), physicalProperties, statistics);
}
@Override
@ -115,9 +115,9 @@ public class PhysicalOneRowRelation extends PhysicalLeaf implements OneRowRelati
@Override
public PhysicalOneRowRelation withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalOneRowRelation(projects, buildUnionNode, Optional.empty(),
logicalPropertiesSupplier.get(), physicalProperties, statsDeriveResult);
logicalPropertiesSupplier.get(), physicalProperties, statistics);
}
public boolean notBuildUnionNode() {

View File

@ -19,7 +19,7 @@ package org.apache.doris.nereids.trees.plans.physical;
import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
/**
* interface for all physical plan.
@ -29,5 +29,5 @@ public interface PhysicalPlan extends Plan {
PhysicalProperties getPhysicalProperties();
PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult);
Statistics statistics);
}

View File

@ -27,7 +27,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Project;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@ -55,8 +55,8 @@ public class PhysicalProject<CHILD_TYPE extends Plan> extends PhysicalUnary<CHIL
public PhysicalProject(List<NamedExpression> projects, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
super(PlanType.PHYSICAL_PROJECT, groupExpression, logicalProperties, physicalProperties, statsDeriveResult,
Statistics statistics, CHILD_TYPE child) {
super(PlanType.PHYSICAL_PROJECT, groupExpression, logicalProperties, physicalProperties, statistics,
child);
this.projects = ImmutableList.copyOf(Objects.requireNonNull(projects, "projects can not be null"));
}
@ -69,7 +69,7 @@ public class PhysicalProject<CHILD_TYPE extends Plan> extends PhysicalUnary<CHIL
public String toString() {
return Utils.toSqlString("PhysicalProject[" + id.asInt() + "]",
"projects", projects,
"stats", statsDeriveResult
"stats", statistics
);
}
@ -118,9 +118,9 @@ public class PhysicalProject<CHILD_TYPE extends Plan> extends PhysicalUnary<CHIL
@Override
public PhysicalProject<CHILD_TYPE> withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalProject<>(projects, Optional.empty(), getLogicalProperties(), physicalProperties,
statsDeriveResult, child());
statistics, child());
}
/**
@ -134,7 +134,7 @@ public class PhysicalProject<CHILD_TYPE extends Plan> extends PhysicalUnary<CHIL
groupExpression,
getLogicalProperties(),
physicalProperties,
statsDeriveResult,
statistics,
child
);
}

View File

@ -27,7 +27,7 @@ import org.apache.doris.nereids.trees.plans.SortPhase;
import org.apache.doris.nereids.trees.plans.algebra.Sort;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
@ -62,9 +62,9 @@ public class PhysicalQuickSort<CHILD_TYPE extends Plan> extends AbstractPhysical
*/
public PhysicalQuickSort(List<OrderKey> orderKeys,
SortPhase phase, Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
PhysicalProperties physicalProperties, Statistics statistics, CHILD_TYPE child) {
super(PlanType.PHYSICAL_QUICK_SORT, orderKeys, phase, groupExpression, logicalProperties, physicalProperties,
statsDeriveResult, child);
statistics, child);
}
@Override
@ -90,9 +90,9 @@ public class PhysicalQuickSort<CHILD_TYPE extends Plan> extends AbstractPhysical
@Override
public PhysicalQuickSort<CHILD_TYPE> withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalQuickSort<>(orderKeys, phase, Optional.empty(), getLogicalProperties(), physicalProperties,
statsDeriveResult, child());
statistics, child());
}
@Override

View File

@ -25,7 +25,7 @@ import org.apache.doris.nereids.trees.plans.ObjectId;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Scan;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
@ -56,8 +56,8 @@ public abstract class PhysicalRelation extends PhysicalLeaf implements Scan {
*/
public PhysicalRelation(ObjectId id, PlanType type, List<String> qualifier,
Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult) {
super(type, groupExpression, logicalProperties, physicalProperties, statsDeriveResult);
PhysicalProperties physicalProperties, Statistics statistics) {
super(type, groupExpression, logicalProperties, physicalProperties, statistics);
this.id = id;
this.qualifier = ImmutableList.copyOf(Objects.requireNonNull(qualifier, "qualifier can not be null"));
}

View File

@ -29,7 +29,7 @@ import org.apache.doris.nereids.trees.plans.algebra.Repeat;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@ -69,9 +69,9 @@ public class PhysicalRepeat<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD
*/
public PhysicalRepeat(List<List<Expression>> groupingSets, List<NamedExpression> outputExpressions,
Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
PhysicalProperties physicalProperties, Statistics statistics, CHILD_TYPE child) {
super(PlanType.PHYSICAL_REPEAT, groupExpression, logicalProperties,
physicalProperties, statsDeriveResult, child);
physicalProperties, statistics, child);
this.groupingSets = Objects.requireNonNull(groupingSets, "groupingSets can not be null")
.stream()
.map(ImmutableList::copyOf)
@ -95,7 +95,7 @@ public class PhysicalRepeat<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD
return Utils.toSqlString("PhysicalRepeat",
"groupingSets", groupingSets,
"outputExpressions", outputExpressions,
"stats", statsDeriveResult
"stats", statistics
);
}
@ -148,25 +148,25 @@ public class PhysicalRepeat<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD
@Override
public PhysicalRepeat<CHILD_TYPE> withGroupExpression(Optional<GroupExpression> groupExpression) {
return new PhysicalRepeat<>(groupingSets, outputExpressions, groupExpression,
getLogicalProperties(), physicalProperties, statsDeriveResult, child());
getLogicalProperties(), physicalProperties, statistics, child());
}
@Override
public PhysicalRepeat<CHILD_TYPE> withLogicalProperties(Optional<LogicalProperties> logicalProperties) {
return new PhysicalRepeat<>(groupingSets, outputExpressions, Optional.empty(),
logicalProperties.get(), physicalProperties, statsDeriveResult, child());
logicalProperties.get(), physicalProperties, statistics, child());
}
@Override
public PhysicalRepeat<CHILD_TYPE> withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalRepeat<>(groupingSets, outputExpressions, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult, child());
getLogicalProperties(), physicalProperties, statistics, child());
}
@Override
public PhysicalRepeat<CHILD_TYPE> withAggOutput(List<NamedExpression> newOutput) {
return new PhysicalRepeat<>(groupingSets, newOutput, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult, child());
getLogicalProperties(), physicalProperties, statistics, child());
}
}

View File

@ -27,7 +27,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Scan;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.List;
import java.util.Optional;
@ -47,9 +47,9 @@ public class PhysicalSchemaScan extends PhysicalRelation implements Scan {
public PhysicalSchemaScan(ObjectId id, Table table, List<String> qualifier,
Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult) {
PhysicalProperties physicalProperties, Statistics statistics) {
super(id, PlanType.PHYSICAL_SCHEMA_SCAN, qualifier, groupExpression, logicalProperties, physicalProperties,
statsDeriveResult);
statistics);
this.table = table;
}
@ -66,20 +66,20 @@ public class PhysicalSchemaScan extends PhysicalRelation implements Scan {
@Override
public Plan withGroupExpression(Optional<GroupExpression> groupExpression) {
return new PhysicalSchemaScan(id, table, qualifier, groupExpression, getLogicalProperties(), physicalProperties,
statsDeriveResult);
statistics);
}
@Override
public Plan withLogicalProperties(Optional<LogicalProperties> logicalProperties) {
return new PhysicalSchemaScan(id, table, qualifier, groupExpression, logicalProperties.get(),
physicalProperties, statsDeriveResult);
physicalProperties, statistics);
}
@Override
public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalSchemaScan(id, table, qualifier, groupExpression, getLogicalProperties(), physicalProperties,
statsDeriveResult);
statistics);
}
@Override

View File

@ -28,7 +28,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.SetOperation;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
@ -63,9 +63,9 @@ public abstract class PhysicalSetOperation extends AbstractPhysicalPlan implemen
public PhysicalSetOperation(PlanType planType,
Qualifier qualifier,
Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult, List<Plan> inputs) {
PhysicalProperties physicalProperties, Statistics statistics, List<Plan> inputs) {
super(planType, groupExpression, logicalProperties,
physicalProperties, statsDeriveResult, inputs.toArray(new Plan[0]));
physicalProperties, statistics, inputs.toArray(new Plan[0]));
this.qualifier = qualifier;
}
@ -78,7 +78,7 @@ public abstract class PhysicalSetOperation extends AbstractPhysicalPlan implemen
public String toString() {
return Utils.toSqlString("PhysicalSetOperation",
"qualifier", qualifier,
"stats", statsDeriveResult);
"stats", statistics);
}
@Override

View File

@ -28,7 +28,7 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.Min;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
@ -53,9 +53,9 @@ public class PhysicalStorageLayerAggregate extends PhysicalRelation {
public PhysicalStorageLayerAggregate(PhysicalRelation relation, PushDownAggOp aggOp,
Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult) {
PhysicalProperties physicalProperties, Statistics statistics) {
super(relation.getId(), relation.getType(), relation.getQualifier(), groupExpression,
logicalProperties, physicalProperties, statsDeriveResult);
logicalProperties, physicalProperties, statistics);
this.relation = Objects.requireNonNull(relation, "relation cannot be null");
this.aggOp = Objects.requireNonNull(aggOp, "aggOp cannot be null");
}
@ -108,7 +108,7 @@ public class PhysicalStorageLayerAggregate extends PhysicalRelation {
return Utils.toSqlString("PhysicalStorageLayerAggregate",
"pushDownAggOp", aggOp,
"relation", relation,
"stats", statsDeriveResult
"stats", statistics
);
}
@ -119,20 +119,20 @@ public class PhysicalStorageLayerAggregate extends PhysicalRelation {
@Override
public PhysicalStorageLayerAggregate withGroupExpression(Optional<GroupExpression> groupExpression) {
return new PhysicalStorageLayerAggregate(relation, aggOp, groupExpression, getLogicalProperties(),
physicalProperties, statsDeriveResult);
physicalProperties, statistics);
}
@Override
public Plan withLogicalProperties(Optional<LogicalProperties> logicalProperties) {
return new PhysicalStorageLayerAggregate(relation, aggOp, Optional.empty(),
logicalProperties.get(), physicalProperties, statsDeriveResult);
logicalProperties.get(), physicalProperties, statistics);
}
@Override
public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalStorageLayerAggregate(relation, aggOp, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult);
getLogicalProperties(), physicalProperties, statistics);
}
/** PushAggOp */

View File

@ -27,7 +27,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.TVFRelation;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
@ -47,29 +47,29 @@ public class PhysicalTVFRelation extends PhysicalRelation implements TVFRelation
public PhysicalTVFRelation(ObjectId id, TableValuedFunction function, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
super(id, PlanType.PHYSICAL_TVF_RELATION, ImmutableList.of(), groupExpression, logicalProperties,
physicalProperties, statsDeriveResult);
physicalProperties, statistics);
this.function = Objects.requireNonNull(function, "function can not be null");
}
@Override
public PhysicalTVFRelation withGroupExpression(Optional<GroupExpression> groupExpression) {
return new PhysicalTVFRelation(id, function, groupExpression, getLogicalProperties(),
physicalProperties, statsDeriveResult);
physicalProperties, statistics);
}
@Override
public PhysicalTVFRelation withLogicalProperties(Optional<LogicalProperties> logicalProperties) {
return new PhysicalTVFRelation(id, function, Optional.empty(),
logicalProperties.get(), physicalProperties, statsDeriveResult);
logicalProperties.get(), physicalProperties, statistics);
}
@Override
public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalTVFRelation(id, function, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult);
getLogicalProperties(), physicalProperties, statistics);
}
@Override

View File

@ -27,7 +27,7 @@ import org.apache.doris.nereids.trees.plans.SortPhase;
import org.apache.doris.nereids.trees.plans.algebra.TopN;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
@ -65,9 +65,9 @@ public class PhysicalTopN<CHILD_TYPE extends Plan> extends AbstractPhysicalSort<
*/
public PhysicalTopN(List<OrderKey> orderKeys, long limit, long offset,
SortPhase phase, Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
PhysicalProperties physicalProperties, Statistics statistics, CHILD_TYPE child) {
super(PlanType.PHYSICAL_TOP_N, orderKeys, phase, groupExpression, logicalProperties, physicalProperties,
statsDeriveResult, child);
statistics, child);
Objects.requireNonNull(orderKeys, "orderKeys should not be null in PhysicalTopN.");
this.limit = limit;
this.offset = offset;
@ -124,9 +124,9 @@ public class PhysicalTopN<CHILD_TYPE extends Plan> extends AbstractPhysicalSort<
@Override
public PhysicalTopN<CHILD_TYPE> withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalTopN<>(orderKeys, limit, offset, phase, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult, child());
getLogicalProperties(), physicalProperties, statistics, child());
}
@Override

View File

@ -23,7 +23,7 @@ import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.UnaryPlan;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.Optional;
import javax.annotation.Nullable;
@ -46,7 +46,7 @@ public abstract class PhysicalUnary<CHILD_TYPE extends Plan>
public PhysicalUnary(PlanType type, Optional<GroupExpression> groupExpression,
LogicalProperties logicalProperties, @Nullable PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult, CHILD_TYPE child) {
super(type, groupExpression, logicalProperties, physicalProperties, statsDeriveResult, child);
Statistics statistics, CHILD_TYPE child) {
super(type, groupExpression, logicalProperties, physicalProperties, statistics, child);
}
}

View File

@ -24,7 +24,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import java.util.List;
import java.util.Optional;
@ -49,9 +49,9 @@ public class PhysicalUnion extends PhysicalSetOperation {
public PhysicalUnion(Qualifier qualifier,
Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult, List<Plan> inputs) {
PhysicalProperties physicalProperties, Statistics statistics, List<Plan> inputs) {
super(PlanType.PHYSICAL_UNION, qualifier,
groupExpression, logicalProperties, physicalProperties, statsDeriveResult, inputs);
groupExpression, logicalProperties, physicalProperties, statistics, inputs);
}
@Override
@ -63,7 +63,7 @@ public class PhysicalUnion extends PhysicalSetOperation {
public String toString() {
return Utils.toSqlString("PhysicalUnion",
"qualifier", qualifier,
"stats", statsDeriveResult);
"stats", statistics);
}
@Override
@ -85,8 +85,8 @@ public class PhysicalUnion extends PhysicalSetOperation {
@Override
public PhysicalUnion withPhysicalPropertiesAndStats(
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult) {
PhysicalProperties physicalProperties, Statistics statistics) {
return new PhysicalUnion(qualifier, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult, children);
getLogicalProperties(), physicalProperties, statistics, children);
}
}

View File

@ -30,7 +30,7 @@ import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Window;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
@ -65,10 +65,10 @@ public class PhysicalWindow<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD
/** constructor for PhysicalWindow */
public PhysicalWindow(WindowFrameGroup windowFrameGroup, RequireProperties requireProperties,
Optional<GroupExpression> groupExpression, LogicalProperties logicalProperties,
PhysicalProperties physicalProperties, StatsDeriveResult statsDeriveResult,
PhysicalProperties physicalProperties, Statistics statistics,
CHILD_TYPE child) {
super(PlanType.PHYSICAL_WINDOW, groupExpression, logicalProperties, physicalProperties,
statsDeriveResult, child);
statistics, child);
this.windowFrameGroup = Objects.requireNonNull(windowFrameGroup, "windowFrameGroup in PhysicalWindow"
+ "cannot be null");
this.requireProperties = requireProperties;
@ -145,9 +145,9 @@ public class PhysicalWindow<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD
@Override
public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statistics) {
return new PhysicalWindow<>(windowFrameGroup, requireProperties, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult, child());
getLogicalProperties(), physicalProperties, statistics, child());
}
@Override
@ -159,6 +159,6 @@ public class PhysicalWindow<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD
public <C extends Plan> PhysicalWindow<C> withRequirePropertiesAndChild(RequireProperties requireProperties,
C newChild) {
return new PhysicalWindow<>(windowFrameGroup, requireProperties, Optional.empty(),
getLogicalProperties(), physicalProperties, statsDeriveResult, newChild);
getLogicalProperties(), physicalProperties, statistics, newChild);
}
}

View File

@ -17,7 +17,6 @@
package org.apache.doris.statistics;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.statistics.util.StatisticsUtil;
@ -26,29 +25,40 @@ import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
public class Bucket {
public LiteralExpr lower;
public LiteralExpr upper;
public int count;
public int preSum;
public int ndv;
public double lower;
public double upper;
public double count;
public double preSum;
public double ndv;
public LiteralExpr getLower() {
public Bucket() {
}
public Bucket(double lower, double upper, double count, double preSum, double ndv) {
this.lower = lower;
this.upper = upper;
this.count = count;
this.preSum = preSum;
this.ndv = ndv;
}
public double getLower() {
return lower;
}
public void setLower(LiteralExpr lower) {
public void setLower(double lower) {
this.lower = lower;
}
public LiteralExpr getUpper() {
public double getUpper() {
return upper;
}
public void setUpper(LiteralExpr upper) {
public void setUpper(double upper) {
this.upper = upper;
}
public int getCount() {
public double getCount() {
return count;
}
@ -56,7 +66,7 @@ public class Bucket {
this.count = count;
}
public int getPreSum() {
public double getPreSum() {
return preSum;
}
@ -64,7 +74,7 @@ public class Bucket {
this.preSum = preSum;
}
public int getNdv() {
public double getNdv() {
return ndv;
}
@ -75,8 +85,8 @@ public class Bucket {
public static Bucket deserializeFromJson(Type datatype, String json) throws AnalysisException {
Bucket bucket = new Bucket();
JsonObject bucketJson = JsonParser.parseString(json).getAsJsonObject();
bucket.lower = StatisticsUtil.readableValue(datatype, bucketJson.get("lower").getAsString());
bucket.upper = StatisticsUtil.readableValue(datatype, bucketJson.get("upper").getAsString());
bucket.lower = StatisticsUtil.convertToDouble(datatype, bucketJson.get("lower").getAsString());
bucket.upper = StatisticsUtil.convertToDouble(datatype, bucketJson.get("upper").getAsString());
bucket.count = bucketJson.get("count").getAsInt();
bucket.preSum = bucketJson.get("pre_sum").getAsInt();
bucket.ndv = bucketJson.get("ndv").getAsInt();
@ -89,8 +99,8 @@ public class Bucket {
}
JsonObject bucketJson = new JsonObject();
bucketJson.addProperty("upper", bucket.upper.getStringValue());
bucketJson.addProperty("lower", bucket.lower.getStringValue());
bucketJson.addProperty("upper", bucket.upper);
bucketJson.addProperty("lower", bucket.lower);
bucketJson.addProperty("count", bucket.count);
bucketJson.addProperty("pre_sum", bucket.preSum);
bucketJson.addProperty("ndv", bucket.ndv);

View File

@ -17,25 +17,22 @@
package org.apache.doris.statistics;
public class Statistic {
public class ColumnLevelStatisticCache {
public Histogram histogram;
public ColumnStatistic columnStatistic;
public Statistic() {
public ColumnLevelStatisticCache() {
}
public Statistic(Histogram histogram, ColumnStatistic columnStatistic) {
public ColumnLevelStatisticCache(Histogram histogram, ColumnStatistic columnStatistic) {
this.histogram = histogram;
this.columnStatistic = columnStatistic;
}
public Histogram getHistogram() {
if (histogram != null) {
return histogram;
}
return Histogram.DEFAULT;
return null;
}
public void setHistogram(Histogram histogram) {
@ -46,7 +43,7 @@ public class Statistic {
if (columnStatistic != null) {
return columnStatistic;
}
return ColumnStatistic.DEFAULT;
return ColumnStatistic.UNKNOWN;
}
public void setColumnStatistic(ColumnStatistic columnStatistic) {

View File

@ -19,6 +19,7 @@ package org.apache.doris.statistics;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.Type;
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
import org.apache.doris.statistics.util.StatisticsUtil;
@ -40,8 +41,8 @@ public class ColumnStatistic {
private static final Logger LOG = LogManager.getLogger(ColumnStatistic.class);
public static ColumnStatistic DEFAULT = new ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1)
.setNumNulls(1).setCount(1).setMaxValue(Double.MAX_VALUE).setMinValue(Double.MIN_VALUE)
public static ColumnStatistic UNKNOWN = new ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1)
.setNumNulls(1).setCount(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY)
.setSelectivity(1.0).setIsUnknown(true)
.build();
@ -81,9 +82,11 @@ public class ColumnStatistic {
public final LiteralExpr minExpr;
public final LiteralExpr maxExpr;
public final Histogram histogram;
public ColumnStatistic(double count, double ndv, double avgSizeByte,
double numNulls, double dataSize, double minValue, double maxValue,
double selectivity, LiteralExpr minExpr, LiteralExpr maxExpr, boolean isUnKnown) {
double selectivity, LiteralExpr minExpr, LiteralExpr maxExpr, boolean isUnKnown, Histogram histogram) {
this.count = count;
this.ndv = ndv;
this.avgSizeByte = avgSizeByte;
@ -95,6 +98,7 @@ public class ColumnStatistic {
this.minExpr = minExpr;
this.maxExpr = maxExpr;
this.isUnKnown = isUnKnown;
this.histogram = histogram;
}
// TODO: use thrift
@ -123,7 +127,7 @@ public class ColumnStatistic {
LOG.warn("Failed to deserialize column statistics, ctlId: {} dbId: {}"
+ "tblId: {} column: {} not exists",
catalogId, dbID, tblId, colName);
return ColumnStatistic.DEFAULT;
return ColumnStatistic.UNKNOWN;
}
String min = resultRow.getColumnValue("min");
String max = resultRow.getColumnValue("max");
@ -132,10 +136,12 @@ public class ColumnStatistic {
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
columnStatisticBuilder.setSelectivity(1.0);
Histogram histogram = Env.getCurrentEnv().getStatisticsCache().getHistogram(tblId, idxId, colName);
columnStatisticBuilder.setHistogram(histogram);
return columnStatisticBuilder.build();
} catch (Exception e) {
LOG.warn("Failed to deserialize column statistics, column not exists", e);
return ColumnStatistic.DEFAULT;
return ColumnStatistic.UNKNOWN;
}
}
@ -183,7 +189,7 @@ public class ColumnStatistic {
public ColumnStatistic updateBySelectivity(double selectivity, double rowCount) {
if (isUnKnown) {
return DEFAULT;
return UNKNOWN;
}
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(this);
Double rowsAfterFilter = rowCount * selectivity;
@ -252,4 +258,8 @@ public class ColumnStatistic {
return isUnKnown ? "unKnown" : String.format("ndv=%.4f, min=%f, max=%f, sel=%f, count=%.4f",
ndv, minValue, maxValue, selectivity, count);
}
public boolean minOrMaxIsInf() {
return Double.isInfinite(maxValue) || Double.isInfinite(minValue);
}
}

View File

@ -33,6 +33,8 @@ public class ColumnStatisticBuilder {
private boolean isUnknown;
private Histogram histogram;
public ColumnStatisticBuilder() {
}
@ -48,6 +50,7 @@ public class ColumnStatisticBuilder {
this.minExpr = columnStatistic.minExpr;
this.maxExpr = columnStatistic.maxExpr;
this.isUnknown = columnStatistic.isUnKnown;
this.histogram = columnStatistic.histogram;
}
public ColumnStatisticBuilder setCount(double count) {
@ -149,8 +152,17 @@ public class ColumnStatisticBuilder {
return isUnknown;
}
public Histogram getHistogram() {
return histogram;
}
public ColumnStatisticBuilder setHistogram(Histogram histogram) {
this.histogram = histogram;
return this;
}
public ColumnStatistic build() {
return new ColumnStatistic(count, ndv, avgSizeByte, numNulls,
dataSize, minValue, maxValue, selectivity, minExpr, maxExpr, isUnknown);
dataSize, minValue, maxValue, selectivity, minExpr, maxExpr, isUnknown, histogram);
}
}

View File

@ -17,11 +17,9 @@
package org.apache.doris.statistics;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
import org.apache.doris.statistics.util.StatisticsUtil;
@ -30,6 +28,7 @@ import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.commons.collections.CollectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.parquet.Strings;
@ -43,19 +42,15 @@ public class Histogram {
public final double sampleRate;
public final int numBuckets;
public final List<Bucket> buckets;
public Histogram(Type dataType, int numBuckets, double sampleRate, List<Bucket> buckets) {
public Histogram(Type dataType, double sampleRate, List<Bucket> buckets) {
this.dataType = dataType;
this.numBuckets = numBuckets;
this.sampleRate = sampleRate;
this.buckets = buckets;
}
public static Histogram DEFAULT = new HistogramBuilder().setDataType(Type.INVALID).setNumBuckets(0)
.setSampleRate(1.0).setBuckets(Lists.newArrayList()).build();
// TODO: use thrift
public static Histogram fromResultRow(ResultRow resultRow) {
@ -72,7 +67,7 @@ public class Histogram {
if (col == null) {
LOG.warn("Failed to deserialize histogram statistics, ctlId: {} dbId: {}"
+ "tblId: {} column: {} not exists", catalogId, dbId, tblId, colName);
return Histogram.DEFAULT;
return null;
}
Type dataType = col.getType();
@ -98,7 +93,7 @@ public class Histogram {
return histogramBuilder.build();
} catch (Exception e) {
LOG.warn("Failed to deserialize histogram statistics.", e);
return Histogram.DEFAULT;
return null;
}
}
@ -154,7 +149,7 @@ public class Histogram {
histogramJson.addProperty("data_type", histogram.dataType.toString());
histogramJson.addProperty("sample_rate", histogram.sampleRate);
histogramJson.addProperty("num_buckets", histogram.numBuckets);
histogramJson.addProperty("num_buckets", histogram.buckets.size());
JsonArray bucketsJsonArray = new JsonArray();
histogram.buckets.stream().map(Bucket::serializeToJsonObj).forEach(bucketsJsonArray::add);
@ -163,189 +158,11 @@ public class Histogram {
return histogramJson.toString();
}
/**
* Given a value, return the bucket to which it belongs,
* return null if not found.
*/
public Bucket findBucket(LiteralExpr key) {
if (buckets == null || buckets.isEmpty()) {
return null;
}
int left = 0;
int right = buckets.size() - 1;
if (key.compareTo(buckets.get(right).upper) > 0) {
return null;
}
while (left < right) {
int mid = left + (right - left) / 2;
if (key.compareTo(buckets.get(mid).upper) > 0) {
left = mid + 1;
} else {
right = mid;
}
}
return buckets.get(right);
}
/**
* Given a range, return the number of elements contained in the range.
* Calculate the range count based on the sampling ratio.
*/
public long rangeCount(LiteralExpr lower, boolean isIncludeLower, LiteralExpr upper, boolean isIncludeUpper) {
try {
double count = rangeCountIgnoreSampleRate(lower, isIncludeLower, upper, isIncludeUpper);
return (long) Math.max((count) / sampleRate, 0);
} catch (Throwable e) {
LOG.warn("Failed to get the number of elements in the histogram range: + " + e);
}
return 0;
}
/**
* Given a range, return the number of elements contained in the range.
*/
private int rangeCountIgnoreSampleRate(LiteralExpr lower, boolean isIncludeLower, LiteralExpr upper,
boolean isIncludeUpper) throws AnalysisException {
if (buckets == null || buckets.isEmpty()) {
return 0;
}
if (lower != null && upper == null) {
if (isIncludeLower) {
return greatEqualCount(lower);
} else {
return greatCount(lower);
}
}
if (lower == null && upper != null) {
if (isIncludeUpper) {
return lessEqualCount(upper);
} else {
return lessCount(upper);
}
}
if (lower != null) {
int cmp = lower.compareTo(upper);
if (cmp > 0) {
return 0;
} else if (cmp == 0) {
if (!isIncludeLower || !isIncludeUpper) {
return 0;
} else {
Bucket bucket = findBucket(upper);
if (bucket == null) {
return 0;
} else {
return bucket.count / bucket.ndv;
}
}
}
Bucket lowerBucket = findBucket(lower);
if (lowerBucket == null) {
return 0;
}
Bucket upperBucket = findBucket(upper);
if (upperBucket == null) {
return greatEqualCount(lower);
}
if (isIncludeLower && isIncludeUpper) {
return totalCount() - lessCount(lower) - greatCount(upper);
} else if (isIncludeLower) {
return totalCount() - lessCount(lower) - greatEqualCount(upper);
} else if (isIncludeUpper) {
return totalCount() - lessEqualCount(lower) - greatCount(upper);
} else {
return totalCount() - lessEqualCount(lower) - greatEqualCount(upper);
}
}
return totalCount();
}
private int totalCount() {
if (buckets == null || buckets.isEmpty()) {
public double size() {
if (CollectionUtils.isEmpty(buckets)) {
return 0;
}
Bucket lastBucket = buckets.get(buckets.size() - 1);
return lastBucket.preSum + lastBucket.count;
}
private int lessCount(LiteralExpr key) throws AnalysisException {
Bucket bucket = findBucket(key);
if (bucket == null) {
if (buckets == null || buckets.isEmpty()) {
return 0;
}
if (key.compareTo(buckets.get(0).lower) < 0) {
return 0;
}
if ((key.compareTo(buckets.get(buckets.size() - 1).upper)) > 0) {
return totalCount();
}
return totalCount();
} else {
if (key.compareTo(bucket.lower) == 0) {
return bucket.preSum;
} else if (key.compareTo(bucket.upper) == 0) {
return bucket.preSum + bucket.count - bucket.count / bucket.ndv;
} else {
Double min = StatisticsUtil.convertToDouble(dataType, bucket.lower.getStringValue());
Double max = StatisticsUtil.convertToDouble(dataType, bucket.upper.getStringValue());
Double v = StatisticsUtil.convertToDouble(dataType, key.getStringValue());
if (v < min) {
v = min;
}
if (v > max) {
v = max;
}
int result = bucket.preSum;
if (max > min) {
result += (v - min) * bucket.count / (max - min);
if (v > min) {
result -= bucket.count / bucket.ndv;
if (result < 0) {
result = 0;
}
}
}
return result;
}
}
}
private int lessEqualCount(LiteralExpr key) throws AnalysisException {
int lessCount = lessCount(key);
Bucket bucket = findBucket(key);
if (bucket == null) {
return lessCount;
} else {
if (key.compareTo(bucket.lower) < 0) {
return lessCount;
}
return lessCount + bucket.count / bucket.ndv;
}
}
private int greatCount(LiteralExpr key) throws AnalysisException {
int lessEqualCount = lessEqualCount(key);
return totalCount() - lessEqualCount;
}
private int greatEqualCount(LiteralExpr key) throws AnalysisException {
int greatCount = greatCount(key);
Bucket bucket = findBucket(key);
if (bucket != null) {
if (key.compareTo(bucket.lower) < 0) {
return greatCount;
}
return greatCount + bucket.count / bucket.ndv;
} else {
return greatCount;
}
return lastBucket.getPreSum() + lastBucket.getCount();
}
}

View File

@ -19,16 +19,19 @@ package org.apache.doris.statistics;
import org.apache.doris.catalog.Type;
import java.util.Comparator;
import java.util.List;
/**
* Builder for histogram
*/
public class HistogramBuilder {
private Type dataType;
private int numBuckets;
private double sampleRate;
private int numBuckets;
private List<Bucket> buckets;
public HistogramBuilder() {
@ -36,7 +39,6 @@ public class HistogramBuilder {
public HistogramBuilder(Histogram histogram) {
this.dataType = histogram.dataType;
this.numBuckets = histogram.numBuckets;
this.sampleRate = histogram.sampleRate;
this.buckets = histogram.buckets;
}
@ -46,43 +48,22 @@ public class HistogramBuilder {
return this;
}
public HistogramBuilder setSampleRate(double sampleRate) {
this.sampleRate = sampleRate;
return this;
}
public HistogramBuilder setNumBuckets(int numBuckets) {
this.numBuckets = numBuckets;
return this;
}
public HistogramBuilder setSampleRate(double sampleRate) {
if (sampleRate < 0 || sampleRate > 1.0) {
this.sampleRate = 1.0;
} else {
this.sampleRate = sampleRate;
}
return this;
}
public HistogramBuilder setBuckets(List<Bucket> buckets) {
buckets.sort(Comparator.comparing(Bucket::getLower));
this.buckets = buckets;
return this;
}
public Type getDataType() {
return dataType;
}
public int getNumBuckets() {
return numBuckets;
}
public double getSampleRate() {
return sampleRate;
}
public List<Bucket> getBuckets() {
return buckets;
}
public Histogram build() {
return new Histogram(dataType, numBuckets, sampleRate, buckets);
return new Histogram(dataType, sampleRate, buckets);
}
}

View File

@ -0,0 +1,162 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import java.util.Objects;
public class StatisticRange {
private static final double INFINITE_TO_FINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR = 0.25;
private static final double INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR = 0.5;
/**
* {@code NaN} represents empty range ({@code high} must be {@code NaN} too)
*/
private final double low;
/**
* {@code NaN} represents empty range ({@code low} must be {@code NaN} too)
*/
private final double high;
private final double distinctValues;
public StatisticRange(double low, double high, double distinctValues) {
this.low = low;
this.high = high;
this.distinctValues = distinctValues;
}
public double overlapPercentWith(StatisticRange other) {
Objects.requireNonNull(other, "other is null");
if (this.isEmpty() || other.isEmpty() || this.distinctValues == 0 || other.distinctValues == 0) {
return 0.0; // zero is better than NaN as it will behave properly for calculating row count
}
if (this.equals(other) && !isBothInfinite()) {
return 1.0;
}
double lengthOfIntersect = Math.min(this.high, other.high) - Math.max(this.low, other.low);
if (Double.isInfinite(lengthOfIntersect)) {
if (Double.isFinite(this.distinctValues) && Double.isFinite(other.distinctValues)) {
return Math.min(other.distinctValues / this.distinctValues, 1);
}
return INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
}
if (lengthOfIntersect == 0) {
return 1 / Math.max(this.distinctValues, 1);
}
if (lengthOfIntersect < 0) {
return 0;
}
double length = length();
if (Double.isInfinite(length)) {
return INFINITE_TO_FINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
}
if (lengthOfIntersect > 0) {
return lengthOfIntersect / length;
}
return INFINITE_TO_FINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
}
public static StatisticRange empty() {
return new StatisticRange(Double.NaN, Double.NaN, 0);
}
public boolean isEmpty() {
return Double.isNaN(low) && Double.isNaN(high);
}
public boolean isBothInfinite() {
return Double.isInfinite(low) && Double.isInfinite(high);
}
public static StatisticRange from(ColumnStatistic column) {
return new StatisticRange(column.minValue, column.maxValue, column.ndv);
}
public double getLow() {
return low;
}
public double getHigh() {
return high;
}
public double length() {
return this.high - this.low;
}
public StatisticRange intersect(StatisticRange other) {
double newLow = Math.max(low, other.low);
double newHigh = Math.min(high, other.high);
if (newLow <= newHigh) {
return new StatisticRange(newLow, newHigh, overlappingDistinctValues(other));
}
return empty();
}
public StatisticRange union(StatisticRange other) {
double overlapPercentThis = this.overlapPercentWith(other);
double overlapPercentOther = other.overlapPercentWith(this);
double overlapNDVThis = overlapPercentThis * distinctValues;
double overlapNDVOther = overlapPercentOther * other.distinctValues;
double maxOverlapNDV = Math.max(overlapNDVThis, overlapNDVOther);
double newNDV = maxOverlapNDV + ((1 - overlapPercentThis) * distinctValues)
+ ((1 - overlapPercentOther) * other.distinctValues);
return new StatisticRange(Math.min(low, other.low), Math.max(high, other.high), newNDV);
}
private double overlappingDistinctValues(StatisticRange other) {
double overlapPercentOfLeft = overlapPercentWith(other);
double overlapPercentOfRight = other.overlapPercentWith(this);
double overlapDistinctValuesLeft = overlapPercentOfLeft * distinctValues;
double overlapDistinctValuesRight = overlapPercentOfRight * other.distinctValues;
double minInputDistinctValues = minExcludeNaN(this.distinctValues, other.distinctValues);
return minExcludeNaN(minInputDistinctValues,
maxExcludeNaN(overlapDistinctValuesLeft, overlapDistinctValuesRight));
}
public static double minExcludeNaN(double v1, double v2) {
if (Double.isNaN(v1)) {
return v2;
}
if (Double.isNaN(v2)) {
return v1;
}
return Math.min(v1, v2);
}
public static double maxExcludeNaN(double v1, double v2) {
if (Double.isNaN(v1)) {
return v2;
}
if (Double.isNaN(v2)) {
return v1;
}
return Math.max(v1, v2);
}
public double getDistinctValues() {
return distinctValues;
}
public static StatisticRange fromColumnStatistics(ColumnStatistic columnStatistic) {
return new StatisticRange(columnStatistic.minValue, columnStatistic.maxValue, columnStatistic.ndv);
}
}

View File

@ -0,0 +1,134 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.nereids.stats.StatsMathUtil;
import org.apache.doris.nereids.trees.expressions.Expression;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
public class Statistics {
private final double rowCount;
private final Map<Expression, ColumnStatistic> expressionToColumnStats;
private double computeSize;
@Deprecated
private double width;
@Deprecated
private double penalty;
public Statistics(Statistics another) {
this.rowCount = another.rowCount;
this.expressionToColumnStats = new HashMap<>(another.expressionToColumnStats);
this.width = another.width;
this.penalty = another.penalty;
}
public Statistics(double rowCount, Map<Expression, ColumnStatistic> expressionToColumnStats) {
this.rowCount = rowCount;
this.expressionToColumnStats = expressionToColumnStats;
}
public Statistics(double rowCount, Map<Expression, ColumnStatistic> expressionToColumnStats, double width,
double penalty) {
this.rowCount = rowCount;
this.expressionToColumnStats = expressionToColumnStats;
this.width = width;
this.penalty = penalty;
}
public ColumnStatistic findColumnStatistics(Expression expression) {
return expressionToColumnStats.get(expression);
}
public Map<Expression, ColumnStatistic> columnStatistics() {
return expressionToColumnStats;
}
public double getRowCount() {
return rowCount;
}
public Statistics withRowCount(double rowCount) {
Statistics statistics = new Statistics(rowCount, new HashMap<>(expressionToColumnStats), width, penalty);
statistics.fix(rowCount / StatsMathUtil.nonZeroDivisor(this.rowCount));
return statistics;
}
public void fix(double sel) {
for (Entry<Expression, ColumnStatistic> entry : expressionToColumnStats.entrySet()) {
ColumnStatistic columnStatistic = entry.getValue();
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(columnStatistic);
columnStatisticBuilder.setNdv(Math.min(Math.ceil(columnStatistic.ndv * sel), rowCount));
columnStatisticBuilder.setNumNulls(Math.min(Math.ceil(columnStatistic.numNulls * sel), rowCount));
columnStatisticBuilder.setCount(Math.min(Math.ceil(columnStatistic.count * sel), rowCount));
expressionToColumnStats.put(entry.getKey(), columnStatisticBuilder.build());
}
}
public Statistics withSel(double sel) {
sel = StatsMathUtil.minNonNaN(sel, 1);
return withRowCount(rowCount * sel);
}
public Statistics addColumnStats(Expression expression, ColumnStatistic columnStatistic) {
expressionToColumnStats.put(expression, columnStatistic);
return this;
}
public Statistics merge(Statistics statistics) {
expressionToColumnStats.putAll(statistics.expressionToColumnStats);
return this;
}
public double computeSize() {
if (computeSize < 0) {
computeSize = Math.max(1, expressionToColumnStats.values().stream()
.map(s -> s.dataSize).reduce(0D, Double::sum)
) * rowCount;
}
return computeSize;
}
@Override
public String toString() {
return String.format("rows=%.4f", rowCount);
}
public void setWidth(double width) {
this.width = width;
}
public void setPenalty(double penalty) {
this.penalty = penalty;
}
public double getWidth() {
return width;
}
public double getPenalty() {
return penalty;
}
}

View File

@ -0,0 +1,60 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.nereids.trees.expressions.Expression;
import java.util.HashMap;
import java.util.Map;
public class StatisticsBuilder {
private double rowCount;
private Map<Expression, ColumnStatistic> expressionToColumnStats;
public StatisticsBuilder() {
expressionToColumnStats = new HashMap<>();
}
public StatisticsBuilder(Statistics statistics) {
this.rowCount = statistics.getRowCount();
expressionToColumnStats = new HashMap<>();
expressionToColumnStats.putAll(statistics.columnStatistics());
}
public StatisticsBuilder setRowCount(double rowCount) {
this.rowCount = rowCount;
return this;
}
public StatisticsBuilder putColumnStatistics(
Map<Expression, ColumnStatistic> expressionToColumnStats) {
this.expressionToColumnStats.putAll(expressionToColumnStats);
return this;
}
public StatisticsBuilder putColumnStatistics(Expression expression, ColumnStatistic columnStatistic) {
expressionToColumnStats.put(expression, columnStatistic);
return this;
}
public Statistics build() {
return new Statistics(rowCount, expressionToColumnStats);
}
}

View File

@ -31,7 +31,7 @@ public class StatisticsCache {
private static final Logger LOG = LogManager.getLogger(StatisticsCache.class);
private final AsyncLoadingCache<StatisticsCacheKey, Statistic> cache = Caffeine.newBuilder()
private final AsyncLoadingCache<StatisticsCacheKey, ColumnLevelStatisticCache> cache = Caffeine.newBuilder()
.maximumSize(StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE)
.expireAfterAccess(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_VALID_DURATION_IN_HOURS))
.refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL))
@ -44,18 +44,18 @@ public class StatisticsCache {
public ColumnStatistic getColumnStatistics(long tblId, long idxId, String colName) {
ConnectContext ctx = ConnectContext.get();
if (ctx != null && ctx.getSessionVariable().internalSession) {
return ColumnStatistic.DEFAULT;
return ColumnStatistic.UNKNOWN;
}
StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colName);
try {
CompletableFuture<Statistic> f = cache.get(k);
CompletableFuture<ColumnLevelStatisticCache> f = cache.get(k);
if (f.isDone() && f.get() != null) {
return f.get().getColumnStatistic();
}
} catch (Exception e) {
LOG.warn("Unexpected exception while returning ColumnStatistic", e);
}
return ColumnStatistic.DEFAULT;
return ColumnStatistic.UNKNOWN;
}
public Histogram getHistogram(long tblId, String colName) {
@ -65,18 +65,18 @@ public class StatisticsCache {
public Histogram getHistogram(long tblId, long idxId, String colName) {
ConnectContext ctx = ConnectContext.get();
if (ctx != null && ctx.getSessionVariable().internalSession) {
return Histogram.DEFAULT;
return null;
}
StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colName);
try {
CompletableFuture<Statistic> f = cache.get(k);
CompletableFuture<ColumnLevelStatisticCache> f = cache.get(k);
if (f.isDone() && f.get() != null) {
return f.get().getHistogram();
}
} catch (Exception e) {
LOG.warn("Unexpected exception while returning Histogram", e);
}
return Histogram.DEFAULT;
return null;
}
// TODO: finish this method.
@ -84,7 +84,7 @@ public class StatisticsCache {
cache.synchronous().invalidate(new StatisticsCacheKey(tblId, idxId, colName));
}
public void updateCache(long tblId, long idxId, String colName, Statistic statistic) {
public void updateCache(long tblId, long idxId, String colName, ColumnLevelStatisticCache statistic) {
cache.synchronous().put(new StatisticsCacheKey(tblId, idxId, colName), statistic);
}

View File

@ -35,7 +35,7 @@ import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.Executor;
public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKey, Statistic> {
public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKey, ColumnLevelStatisticCache> {
private static final Logger LOG = LogManager.getLogger(StatisticsCacheLoader.class);
@ -53,7 +53,7 @@ public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKe
// TODO: Maybe we should trigger a analyze job when the required ColumnStatistic doesn't exists.
@Override
public @NonNull CompletableFuture<Statistic> asyncLoad(@NonNull StatisticsCacheKey key,
public @NonNull CompletableFuture<ColumnLevelStatisticCache> asyncLoad(@NonNull StatisticsCacheKey key,
@NonNull Executor executor) {
synchronized (LOCK) {
if (CUR_RUNNING_LOAD > StatisticConstants.LOAD_TASK_LIMITS) {
@ -65,7 +65,7 @@ public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKe
}
CUR_RUNNING_LOAD++;
return CompletableFuture.supplyAsync(() -> {
Statistic statistic = new Statistic();
ColumnLevelStatisticCache statistic = new ColumnLevelStatisticCache();
try {
Map<String, String> params = new HashMap<>();
@ -84,7 +84,7 @@ public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKe
throw new CompletionException(e);
}
if (CollectionUtils.isEmpty(columnStatistics)) {
statistic.setColumnStatistic(ColumnStatistic.DEFAULT);
statistic.setColumnStatistic(ColumnStatistic.UNKNOWN);
} else {
statistic.setColumnStatistic(columnStatistics.get(0));
}
@ -99,9 +99,7 @@ public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKe
LOG.warn("Failed to deserialize histogram statistics", e);
throw new CompletionException(e);
}
if (CollectionUtils.isEmpty(histogramStatistics)) {
statistic.setHistogram(Histogram.DEFAULT);
} else {
if (!CollectionUtils.isEmpty(histogramStatistics)) {
statistic.setHistogram(histogramStatistics.get(0));
}
} finally {

View File

@ -79,7 +79,7 @@ public class StatisticsRepository {
public static ColumnStatistic queryColumnStatisticsByName(long tableId, String colName) {
ResultRow resultRow = queryColumnStatisticById(tableId, colName);
if (resultRow == null) {
return ColumnStatistic.DEFAULT;
return ColumnStatistic.UNKNOWN;
}
return ColumnStatistic.fromResultRow(resultRow);
}
@ -203,7 +203,7 @@ public class StatisticsRepository {
Histogram histogram = Env.getCurrentEnv().getStatisticsCache()
.getHistogram(objects.table.getId(), -1, colName);
Statistic statistic = new Statistic();
ColumnLevelStatisticCache statistic = new ColumnLevelStatisticCache();
statistic.setHistogram(histogram);
statistic.setColumnStatistic(builder.build());

View File

@ -34,6 +34,7 @@ public class StatsDeriveResult {
private final double rowCount;
private double computeSize = -1D;
private int width = 1;
private double penalty = 0.0;
// TODO: Should we use immutable type for this field?

View File

@ -117,15 +117,15 @@ class JoinHintTest extends TestWithFeService implements MemoPatternMatchSupporte
physicalDistribute(
physicalProject(
physicalHashJoin(
physicalHashJoin(),
physicalDistribute().when(dis -> {
physicalDistribute(physicalHashJoin()).when(dis -> {
DistributionSpec spec = dis.getDistributionSpec();
Assertions.assertTrue(spec instanceof DistributionSpecHash);
DistributionSpecHash hashSpec = (DistributionSpecHash) spec;
Assertions.assertEquals(ShuffleType.ENFORCED,
hashSpec.getShuffleType());
return true;
})
}),
physicalDistribute()
).when(join -> join.getHint() == JoinHint.SHUFFLE_RIGHT)
)
)

View File

@ -35,7 +35,7 @@ import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.util.MemoTestUtils;
import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
import mockit.Expectations;
@ -67,9 +67,9 @@ public class DeriveStatsJobTest {
while (!cascadesContext.getJobPool().isEmpty()) {
cascadesContext.getJobPool().pop().execute();
}
StatsDeriveResult statistics = cascadesContext.getMemo().getRoot().getStatistics();
Statistics statistics = cascadesContext.getMemo().getRoot().getStatistics();
Assertions.assertNotNull(statistics);
Assertions.assertEquals(1, statistics.getRowCount());
Assertions.assertEquals(0, statistics.getRowCount());
}
private LogicalOlapScan constructOlapSCan() {

View File

@ -17,9 +17,9 @@
package org.apache.doris.nereids.stats;
import org.apache.doris.common.Id;
import org.apache.doris.nereids.trees.expressions.Add;
import org.apache.doris.nereids.trees.expressions.Divide;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Multiply;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.Subtract;
@ -28,7 +28,7 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.Min;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import org.apache.commons.math3.util.Precision;
import org.junit.jupiter.api.Assertions;
@ -45,7 +45,7 @@ class ExpressionEstimationTest {
public void test1() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
Max max = new Max(a);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
@ -53,8 +53,8 @@ class ExpressionEstimationTest {
.setNumNulls(0)
.setMinValue(0)
.setMaxValue(500);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
//min/max not changed. select min(A) as X from T group by B. X.max is A.max, not A.min
ColumnStatistic estimated = ExpressionEstimation.estimate(max, stat);
@ -68,7 +68,7 @@ class ExpressionEstimationTest {
@Test
public void test2() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
@ -76,8 +76,8 @@ class ExpressionEstimationTest {
.setNumNulls(0)
.setMinValue(0)
.setMaxValue(1000);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
Min max = new Min(a);
//min/max not changed. select max(A) as X from T group by B. X.min is A.min, not A.max
ColumnStatistic estimated = ExpressionEstimation.estimate(max, stat);
@ -92,7 +92,7 @@ class ExpressionEstimationTest {
@Test
public void test3() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
@ -106,10 +106,10 @@ class ExpressionEstimationTest {
.setNumNulls(0)
.setMinValue(300)
.setMaxValue(1000);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
slotToColumnStat.put(b.getExprId(), builder1.build());
slotToColumnStat.put(b, builder1.build());
Add add = new Add(a, b);
ColumnStatistic estimated = ExpressionEstimation.estimate(add, stat);
Assertions.assertEquals(300, estimated.minValue);
@ -122,19 +122,19 @@ class ExpressionEstimationTest {
@Test
public void test4() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(0)
.setMaxValue(500);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
builder.setMinValue(300);
builder.setMaxValue(1000);
slotToColumnStat.put(b.getExprId(), builder.build());
slotToColumnStat.put(b, builder.build());
Subtract subtract = new Subtract(a, b);
ColumnStatistic estimated = ExpressionEstimation.estimate(subtract, stat);
Assertions.assertEquals(-1000, estimated.minValue);
@ -147,19 +147,19 @@ class ExpressionEstimationTest {
@Test
public void test5() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(-200)
.setMaxValue(-100);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
builder.setMinValue(-300);
builder.setMaxValue(1000);
slotToColumnStat.put(b.getExprId(), builder.build());
slotToColumnStat.put(b, builder.build());
Multiply multiply = new Multiply(a, b);
ColumnStatistic estimated = ExpressionEstimation.estimate(multiply, stat);
Assertions.assertEquals(-200 * 1000, estimated.minValue);
@ -172,19 +172,19 @@ class ExpressionEstimationTest {
@Test
public void test6() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(-200)
.setMaxValue(-100);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
builder.setMinValue(-1000);
builder.setMaxValue(-300);
slotToColumnStat.put(b.getExprId(), builder.build());
slotToColumnStat.put(b, builder.build());
Multiply multiply = new Multiply(a, b);
ColumnStatistic estimated = ExpressionEstimation.estimate(multiply, stat);
Assertions.assertEquals(-100 * -300, estimated.minValue);
@ -197,7 +197,7 @@ class ExpressionEstimationTest {
@Test
public void test7() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
@ -211,10 +211,10 @@ class ExpressionEstimationTest {
.setNumNulls(0)
.setMinValue(-300)
.setMaxValue(1000);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
slotToColumnStat.put(b.getExprId(), builder1.build());
slotToColumnStat.put(b, builder1.build());
Divide divide = new Divide(a, b);
ColumnStatistic estimated = ExpressionEstimation.estimate(divide, stat);
Assertions.assertTrue(Precision.equals(-0.2, estimated.minValue, 0.001));
@ -227,7 +227,7 @@ class ExpressionEstimationTest {
@Test
public void test8() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
@ -241,10 +241,10 @@ class ExpressionEstimationTest {
.setNumNulls(0)
.setMinValue(-1000)
.setMaxValue(-100);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
slotToColumnStat.put(b.getExprId(), builder1.build());
slotToColumnStat.put(b, builder1.build());
Divide divide = new Divide(a, b);
ColumnStatistic estimated = ExpressionEstimation.estimate(divide, stat);
Assertions.assertTrue(Precision.equals(0.1, estimated.minValue, 0.001));

View File

@ -17,9 +17,9 @@
package org.apache.doris.nereids.stats;
import org.apache.doris.common.Id;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.GreaterThan;
import org.apache.doris.nereids.trees.expressions.GreaterThanEqual;
import org.apache.doris.nereids.trees.expressions.InPredicate;
@ -32,7 +32,7 @@ import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.Lists;
import org.apache.commons.math3.util.Precision;
@ -55,25 +55,21 @@ class FilterEstimationTest {
IntegerLiteral int100 = new IntegerLiteral(100);
LessThan lessThan = new LessThan(b, int100);
Or or = new Or(greaterThan1, lessThan);
Map<Id, ColumnStatistic> columnStat = new HashMap<>();
Map<Expression, ColumnStatistic> columnStat = new HashMap<>();
ColumnStatistic aStats = new ColumnStatisticBuilder().setCount(500).setNdv(500).setAvgSizeByte(4)
.setNumNulls(500).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
ColumnStatistic bStats = new ColumnStatisticBuilder().setCount(500).setNdv(500).setAvgSizeByte(4)
.setNumNulls(500).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).setIsUnknown(true).build();
columnStat.put(a.getExprId(), aStats);
columnStat.put(b.getExprId(), bStats);
columnStat.put(a, aStats);
columnStat.put(b, bStats);
StatsDeriveResult stat = new StatsDeriveResult(1000, columnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(or);
double greaterThan1Selectivity = int500.getDouble() / (aStats.maxValue - aStats.minValue);
double lessThanSelectivity = FilterEstimation.DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
double andSelectivity = greaterThan1Selectivity * lessThanSelectivity;
double orSelectivity = greaterThan1Selectivity + lessThanSelectivity - andSelectivity;
Statistics stat = new Statistics(1000, columnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(or, stat);
Assertions.assertTrue(
Precision.equals(expected.getRowCount(), orSelectivity * stat.getRowCount(),
Precision.equals(expected.getRowCount(), 550,
0.01));
}
@ -88,24 +84,21 @@ class FilterEstimationTest {
IntegerLiteral int100 = new IntegerLiteral(100);
LessThan lessThan = new LessThan(b, int100);
And and = new And(greaterThan1, lessThan);
Map<Id, ColumnStatistic> columnStat = new HashMap<>();
Map<Expression, ColumnStatistic> columnStat = new HashMap<>();
ColumnStatistic aStats = new ColumnStatisticBuilder().setCount(500).setNdv(500)
.setAvgSizeByte(4).setNumNulls(500).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
ColumnStatistic bStats = new ColumnStatisticBuilder().setCount(500).setNdv(500)
.setAvgSizeByte(4).setNumNulls(500).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).setIsUnknown(true).build();
columnStat.put(a.getExprId(), aStats);
columnStat.put(b.getExprId(), bStats);
columnStat.put(a, aStats);
columnStat.put(b, bStats);
StatsDeriveResult stat = new StatsDeriveResult(1000, columnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(and);
double greaterThan1Selectivity = int500.getDouble() / (aStats.maxValue - aStats.minValue);
double lessThanSelectivity = FilterEstimation.DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
double andSelectivity = greaterThan1Selectivity * lessThanSelectivity;
Statistics stat = new Statistics(1000, columnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(and, stat);
Assertions.assertTrue(
Precision.equals(expected.getRowCount(), andSelectivity * stat.getRowCount(),
Precision.equals(expected.getRowCount(), 50,
0.01));
}
@ -114,16 +107,16 @@ class FilterEstimationTest {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
IntegerLiteral int500 = new IntegerLiteral(500);
InPredicate in = new InPredicate(a, Lists.newArrayList(int500));
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
.setIsUnknown(true);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(in);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(in, stat);
Assertions.assertEquals(
FilterEstimation.DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY * stat.getRowCount(),
FilterEstimation.DEFAULT_INEQUALITY_COEFFICIENT * stat.getRowCount(),
expected.getRowCount());
}
@ -133,16 +126,16 @@ class FilterEstimationTest {
IntegerLiteral int500 = new IntegerLiteral(500);
InPredicate in = new InPredicate(a, Lists.newArrayList(int500));
Not notIn = new Not(in);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
.setIsUnknown(true);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(notIn);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(notIn, stat);
Assertions.assertEquals(
FilterEstimation.DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY * stat.getRowCount(),
FilterEstimation.DEFAULT_INEQUALITY_COEFFICIENT * stat.getRowCount(),
expected.getRowCount());
}
@ -159,16 +152,16 @@ class FilterEstimationTest {
GreaterThan ge = new GreaterThan(a, int100);
LessThan le = new LessThan(a, int200);
And and = new And(ge, le);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatistic aStats = new ColumnStatisticBuilder().setCount(300).setNdv(30)
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
.setMinValue(0).setMaxValue(300).build();
slotToColumnStat.put(a.getExprId(), aStats);
StatsDeriveResult stats = new StatsDeriveResult(300, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stats);
StatsDeriveResult result = filterEstimation.estimate(and);
slotToColumnStat.put(a, aStats);
Statistics stats = new Statistics(300, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics result = filterEstimation.estimate(and, stats);
Assertions.assertEquals(100, result.getRowCount());
ColumnStatistic aStatsEst = result.getColumnStatsBySlot(a);
ColumnStatistic aStatsEst = result.findColumnStatistics(a);
Assertions.assertEquals(100, aStatsEst.minValue);
Assertions.assertEquals(200, aStatsEst.maxValue);
Assertions.assertEquals(1.0, aStatsEst.selectivity);
@ -188,7 +181,7 @@ class FilterEstimationTest {
EqualTo equalTo = new EqualTo(a, c);
And and = new And(greaterThan1, lessThan);
Or or = new Or(and, equalTo);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatistic aStats = new ColumnStatisticBuilder().setCount(500).setNdv(500)
.setAvgSizeByte(4).setNumNulls(500).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
@ -198,20 +191,14 @@ class FilterEstimationTest {
ColumnStatistic cStats = new ColumnStatisticBuilder().setCount(500).setNdv(500)
.setAvgSizeByte(4).setNumNulls(500).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
slotToColumnStat.put(a.getExprId(), aStats);
slotToColumnStat.put(b.getExprId(), bStats);
slotToColumnStat.put(c.getExprId(), cStats);
StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(or);
double greaterThan1Selectivity = int500.getDouble() / (aStats.maxValue - aStats.minValue);
double lessThanSelectivity = int100.getDouble() / (bStats.maxValue - bStats.minValue);
double andSelectivity = greaterThan1Selectivity * lessThanSelectivity;
double equalSelectivity = FilterEstimation.DEFAULT_EQUALITY_COMPARISON_SELECTIVITY;
slotToColumnStat.put(a, aStats);
slotToColumnStat.put(b, bStats);
slotToColumnStat.put(c, cStats);
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(or, stat);
Assertions.assertTrue(
Precision.equals((andSelectivity + equalSelectivity
- andSelectivity * equalSelectivity) * stat.getRowCount(),
expected.getRowCount(), 0.01));
Precision.equals(50, expected.getRowCount(), 0.01));
}
// a > 500 and b < 100 or a > c
@ -227,23 +214,21 @@ class FilterEstimationTest {
GreaterThan greaterThan = new GreaterThan(a, c);
And and = new And(greaterThan1, lessThan);
Or or = new Or(and, greaterThan);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder aBuilder = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
.setNumNulls(500)
.setMinValue(0)
.setMaxValue(1000);
slotToColumnStat.put(a.getExprId(), aBuilder.build());
slotToColumnStat.put(b.getExprId(), aBuilder.build());
slotToColumnStat.put(c.getExprId(), aBuilder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(or);
slotToColumnStat.put(a, aBuilder.build());
slotToColumnStat.put(b, aBuilder.build());
slotToColumnStat.put(c, aBuilder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(or, stat);
Assertions.assertTrue(
Precision.equals((0.5 * 0.1
+ FilterEstimation.DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY
- 0.5 * 0.1 * FilterEstimation.DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY) * 1000,
Precision.equals(512.5,
expected.getRowCount(), 0.01));
}
@ -254,17 +239,17 @@ class FilterEstimationTest {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
IntegerLiteral int500 = new IntegerLiteral(500);
GreaterThanEqual ge = new GreaterThanEqual(a, int500);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
.setNumNulls(500)
.setMinValue(0)
.setMaxValue(500);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(ge);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(ge, stat);
Assertions.assertEquals(1000 * 1.0 / 500, expected.getRowCount());
}
@ -275,17 +260,17 @@ class FilterEstimationTest {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
IntegerLiteral int500 = new IntegerLiteral(500);
LessThanEqual le = new LessThanEqual(a, int500);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder1 = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
.setNumNulls(500)
.setMinValue(500)
.setMaxValue(1000);
slotToColumnStat.put(a.getExprId(), builder1.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(le);
slotToColumnStat.put(a, builder1.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(le, stat);
Assertions.assertEquals(1000 * 1.0 / 500, expected.getRowCount());
}
@ -296,18 +281,18 @@ class FilterEstimationTest {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
IntegerLiteral int500 = new IntegerLiteral(500);
LessThan less = new LessThan(a, int500);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
.setNumNulls(500)
.setMinValue(500)
.setMaxValue(1000);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(less);
Assertions.assertEquals(0, expected.getRowCount());
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(less, stat);
Assertions.assertEquals(2, expected.getRowCount());
}
// a > 1000
@ -317,18 +302,18 @@ class FilterEstimationTest {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
IntegerLiteral int1000 = new IntegerLiteral(1000);
GreaterThan ge = new GreaterThan(a, int1000);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
.setNumNulls(500)
.setMinValue(500)
.setMaxValue(1000);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(ge);
Assertions.assertEquals(0, expected.getRowCount());
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(ge, stat);
Assertions.assertEquals(2, expected.getRowCount());
}
// a > b
@ -339,7 +324,7 @@ class FilterEstimationTest {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
GreaterThan ge = new GreaterThan(a, b);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder1 = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
@ -352,11 +337,11 @@ class FilterEstimationTest {
.setNumNulls(0)
.setMinValue(501)
.setMaxValue(1000);
slotToColumnStat.put(a.getExprId(), builder1.build());
slotToColumnStat.put(b.getExprId(), builder2.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult expected = filterEstimation.estimate(ge);
slotToColumnStat.put(a, builder1.build());
slotToColumnStat.put(b, builder2.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(ge, stat);
Assertions.assertEquals(0, expected.getRowCount());
}
@ -368,7 +353,7 @@ class FilterEstimationTest {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
LessThan less = new LessThan(a, b);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder1 = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
@ -381,11 +366,11 @@ class FilterEstimationTest {
.setNumNulls(0)
.setMinValue(501)
.setMaxValue(1000);
slotToColumnStat.put(a.getExprId(), builder1.build());
slotToColumnStat.put(b.getExprId(), builder2.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult esimated = filterEstimation.estimate(less);
slotToColumnStat.put(a, builder1.build());
slotToColumnStat.put(b, builder2.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics esimated = filterEstimation.estimate(less, stat);
Assertions.assertEquals(1000, esimated.getRowCount());
}
@ -397,7 +382,7 @@ class FilterEstimationTest {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
GreaterThan ge = new GreaterThan(a, b);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder1 = new ColumnStatisticBuilder()
.setNdv(500)
.setAvgSizeByte(4)
@ -410,12 +395,12 @@ class FilterEstimationTest {
.setNumNulls(500)
.setMinValue(0)
.setMaxValue(500);
slotToColumnStat.put(a.getExprId(), builder1.build());
slotToColumnStat.put(b.getExprId(), builder2.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult estimated = filterEstimation.estimate(ge);
Assertions.assertEquals(1000, estimated.getRowCount());
slotToColumnStat.put(a, builder1.build());
slotToColumnStat.put(b, builder2.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics estimated = filterEstimation.estimate(ge, stat);
Assertions.assertEquals(500, estimated.getRowCount());
}
// a in (1, 3, 5)
@ -427,17 +412,17 @@ class FilterEstimationTest {
IntegerLiteral i3 = new IntegerLiteral(3);
IntegerLiteral i5 = new IntegerLiteral(5);
InPredicate inPredicate = new InPredicate(a, Lists.newArrayList(i1, i3, i5));
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(10)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(1)
.setMaxValue(10);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult estimated = filterEstimation.estimate(inPredicate);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics estimated = filterEstimation.estimate(inPredicate, stat);
Assertions.assertEquals(1000 * 3.0 / 10.0, estimated.getRowCount());
}
@ -451,17 +436,17 @@ class FilterEstimationTest {
IntegerLiteral i5 = new IntegerLiteral(5);
InPredicate inPredicate = new InPredicate(a, Lists.newArrayList(i1, i3, i5));
Not not = new Not(inPredicate);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(10)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(1)
.setMaxValue(10);
slotToColumnStat.put(a.getExprId(), builder.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, 1, 0, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult estimated = filterEstimation.estimate(not);
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics estimated = filterEstimation.estimate(not, stat);
Assertions.assertEquals(1000 * 7.0 / 10.0, estimated.getRowCount());
}
@ -476,7 +461,7 @@ class FilterEstimationTest {
SlotReference c = new SlotReference("c", IntegerType.INSTANCE);
IntegerLiteral i100 = new IntegerLiteral(100);
GreaterThan ge = new GreaterThan(c, i100);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
.setNdv(1000)
.setAvgSizeByte(4)
@ -498,23 +483,20 @@ class FilterEstimationTest {
.setMinValue(0)
.setMaxValue(200)
.setSelectivity(1.0);
slotToColumnStat.put(a.getExprId(), builderA.build());
slotToColumnStat.put(b.getExprId(), builderB.build());
slotToColumnStat.put(c.getExprId(), builderC.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult estimated = filterEstimation.estimate(ge);
ColumnStatistic statsA = estimated.getColumnStatsBySlotId(a.getExprId());
slotToColumnStat.put(a, builderA.build());
slotToColumnStat.put(b, builderB.build());
slotToColumnStat.put(c, builderC.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics estimated = filterEstimation.estimate(ge, stat);
ColumnStatistic statsA = estimated.findColumnStatistics(a);
Assertions.assertEquals(500, statsA.ndv);
Assertions.assertEquals(0.5, statsA.selectivity);
ColumnStatistic statsB = estimated.getColumnStatsBySlotId(b.getExprId());
Assertions.assertEquals(100, statsB.ndv);
Assertions.assertEquals(1.0, statsB.selectivity);
ColumnStatistic statsC = estimated.getColumnStatsBySlotId(c.getExprId());
ColumnStatistic statsB = estimated.findColumnStatistics(b);
Assertions.assertEquals(50, statsB.ndv);
ColumnStatistic statsC = estimated.findColumnStatistics(c);
Assertions.assertEquals(50, statsC.ndv);
Assertions.assertEquals(100, statsC.minValue);
Assertions.assertEquals(200, statsC.maxValue);
Assertions.assertEquals(1.0, statsC.selectivity);
}
/**
@ -535,7 +517,7 @@ class FilterEstimationTest {
//GreaterThan ge2 = new GreaterThan(i20, c);
LessThan le1 = new LessThan(c, i20);
And and = new And(ge1, le1);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
.setNdv(100)
.setAvgSizeByte(4)
@ -557,26 +539,26 @@ class FilterEstimationTest {
.setMinValue(0)
.setMaxValue(40)
.setSelectivity(1.0);
slotToColumnStat.put(a.getExprId(), builderA.build());
slotToColumnStat.put(b.getExprId(), builderB.build());
slotToColumnStat.put(c.getExprId(), builderC.build());
StatsDeriveResult stat = new StatsDeriveResult(100, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult estimated = filterEstimation.estimate(and);
slotToColumnStat.put(a, builderA.build());
slotToColumnStat.put(b, builderB.build());
slotToColumnStat.put(c, builderC.build());
Statistics stat = new Statistics(100, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics estimated = filterEstimation.estimate(and, stat);
Assertions.assertEquals(25, estimated.getRowCount());
ColumnStatistic statsA = estimated.getColumnStatsBySlot(a);
ColumnStatistic statsA = estimated.findColumnStatistics(a);
Assertions.assertEquals(25, statsA.ndv);
//Assertions.assertEquals(0.25, statsA.selectivity);
Assertions.assertEquals(0, statsA.minValue);
Assertions.assertEquals(100, statsA.maxValue);
ColumnStatistic statsB = estimated.getColumnStatsBySlot(b);
Assertions.assertEquals(20, statsB.ndv);
ColumnStatistic statsB = estimated.findColumnStatistics(b);
Assertions.assertEquals(5, statsB.ndv);
Assertions.assertEquals(0, statsB.minValue);
Assertions.assertEquals(500, statsB.maxValue);
Assertions.assertEquals(1.0, statsB.selectivity);
ColumnStatistic statsC = estimated.getColumnStatsBySlot(c);
ColumnStatistic statsC = estimated.findColumnStatistics(c);
Assertions.assertEquals(10, statsC.ndv);
Assertions.assertEquals(10, statsC.minValue);
Assertions.assertEquals(20, statsC.maxValue);
@ -599,7 +581,7 @@ class FilterEstimationTest {
SlotReference c = new SlotReference("c", IntegerType.INSTANCE);
IntegerLiteral i300 = new IntegerLiteral(300);
GreaterThan ge = new GreaterThan(c, i300);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
.setNdv(1000)
.setAvgSizeByte(4)
@ -621,23 +603,20 @@ class FilterEstimationTest {
.setMinValue(0)
.setMaxValue(200)
.setSelectivity(1.0);
slotToColumnStat.put(a.getExprId(), builderA.build());
slotToColumnStat.put(b.getExprId(), builderB.build());
slotToColumnStat.put(c.getExprId(), builderC.build());
StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
StatsDeriveResult estimated = filterEstimation.estimate(ge);
ColumnStatistic statsA = estimated.getColumnStatsBySlot(a);
slotToColumnStat.put(a, builderA.build());
slotToColumnStat.put(b, builderB.build());
slotToColumnStat.put(c, builderC.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics estimated = filterEstimation.estimate(ge, stat);
ColumnStatistic statsA = estimated.findColumnStatistics(a);
Assertions.assertEquals(0, statsA.ndv);
Assertions.assertEquals(0, statsA.selectivity);
ColumnStatistic statsB = estimated.getColumnStatsBySlot(b);
ColumnStatistic statsB = estimated.findColumnStatistics(b);
Assertions.assertEquals(0, statsB.ndv);
Assertions.assertEquals(0.0, statsB.selectivity);
ColumnStatistic statsC = estimated.getColumnStatsBySlot(c);
ColumnStatistic statsC = estimated.findColumnStatistics(c);
Assertions.assertEquals(0, statsC.ndv);
Assertions.assertEquals(300, statsC.minValue);
Assertions.assertEquals(300, statsC.maxValue);
Assertions.assertEquals(1.0, statsC.selectivity);
Assertions.assertTrue(Double.isNaN(statsC.minValue));
Assertions.assertTrue(Double.isNaN(statsC.maxValue));
}
/**
@ -673,7 +652,7 @@ class FilterEstimationTest {
SlotReference c = new SlotReference("c", IntegerType.INSTANCE);
IntegerLiteral i10 = new IntegerLiteral(10);
IntegerLiteral i20 = new IntegerLiteral(20);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
.setNdv(100)
@ -696,29 +675,26 @@ class FilterEstimationTest {
.setMinValue(0)
.setMaxValue(40)
.setSelectivity(1.0);
slotToColumnStat.put(a.getExprId(), builderA.build());
slotToColumnStat.put(b.getExprId(), builderB.build());
slotToColumnStat.put(c.getExprId(), builderC.build());
StatsDeriveResult stat = new StatsDeriveResult(100, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
slotToColumnStat.put(a, builderA.build());
slotToColumnStat.put(b, builderB.build());
slotToColumnStat.put(c, builderC.build());
Statistics stat = new Statistics(100, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
InPredicate inPredicate = new InPredicate(c, Lists.newArrayList(i10, i20));
StatsDeriveResult estimated = filterEstimation.estimate(inPredicate);
ColumnStatistic statsA = estimated.getColumnStatsBySlot(a);
ColumnStatistic statsB = estimated.getColumnStatsBySlot(b);
ColumnStatistic statsC = estimated.getColumnStatsBySlot(c);
Statistics estimated = filterEstimation.estimate(inPredicate, stat);
ColumnStatistic statsA = estimated.findColumnStatistics(a);
ColumnStatistic statsB = estimated.findColumnStatistics(b);
ColumnStatistic statsC = estimated.findColumnStatistics(c);
Assertions.assertEquals(5, statsA.ndv);
Assertions.assertEquals(0, statsA.minValue);
Assertions.assertEquals(100, statsA.maxValue);
Assertions.assertEquals(0.05, statsA.selectivity);
Assertions.assertEquals(5, statsB.ndv);
Assertions.assertEquals(1, statsB.ndv);
Assertions.assertEquals(0, statsB.minValue);
Assertions.assertEquals(500, statsB.maxValue);
Assertions.assertEquals(0.25, statsB.selectivity);
Assertions.assertEquals(2, statsC.ndv);
Assertions.assertEquals(10, statsC.minValue);
Assertions.assertEquals(20, statsC.maxValue);
Assertions.assertEquals(0.2, statsC.selectivity);
}
/**
@ -747,7 +723,7 @@ class FilterEstimationTest {
IntegerLiteral i10 = new IntegerLiteral(10);
IntegerLiteral i15 = new IntegerLiteral(15);
IntegerLiteral i200 = new IntegerLiteral(200);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
.setNdv(100)
@ -773,32 +749,29 @@ class FilterEstimationTest {
.setMinValue(0)
.setMaxValue(40)
.setSelectivity(1.0);
slotToColumnStat.put(a.getExprId(), builderA.build());
slotToColumnStat.put(b.getExprId(), builderB.build());
slotToColumnStat.put(c.getExprId(), builderC.build());
StatsDeriveResult stat = new StatsDeriveResult(100, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
slotToColumnStat.put(a, builderA.build());
slotToColumnStat.put(b, builderB.build());
slotToColumnStat.put(c, builderC.build());
Statistics stat = new Statistics(100, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
InPredicate inPredicate = new InPredicate(c, Lists.newArrayList(i10, i15, i200));
StatsDeriveResult estimated = filterEstimation.estimate(inPredicate);
ColumnStatistic statsA = estimated.getColumnStatsBySlot(a);
ColumnStatistic statsB = estimated.getColumnStatsBySlot(b);
ColumnStatistic statsC = estimated.getColumnStatsBySlot(c);
Statistics estimated = filterEstimation.estimate(inPredicate, stat);
ColumnStatistic statsA = estimated.findColumnStatistics(a);
ColumnStatistic statsB = estimated.findColumnStatistics(b);
ColumnStatistic statsC = estimated.findColumnStatistics(c);
System.out.println(statsA);
System.out.println(statsB);
System.out.println(statsC);
Assertions.assertEquals(5, statsA.ndv);
Assertions.assertEquals(0, statsA.minValue);
Assertions.assertEquals(100, statsA.maxValue);
Assertions.assertEquals(0.05, statsA.selectivity);
Assertions.assertEquals(5, statsB.ndv);
Assertions.assertEquals(1, statsB.ndv);
Assertions.assertEquals(0, statsB.minValue);
Assertions.assertEquals(500, statsB.maxValue);
Assertions.assertEquals(0.25, statsB.selectivity);
Assertions.assertEquals(2, statsC.ndv);
Assertions.assertEquals(10, statsC.minValue);
Assertions.assertEquals(15, statsC.maxValue);
Assertions.assertEquals(0.4, statsC.selectivity);
}
/**
@ -822,7 +795,7 @@ class FilterEstimationTest {
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
SlotReference c = new SlotReference("c", IntegerType.INSTANCE);
IntegerLiteral i10 = new IntegerLiteral(10);
Map<Id, ColumnStatistic> slotToColumnStat = new HashMap<>();
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
.setNdv(100)
@ -848,31 +821,28 @@ class FilterEstimationTest {
.setMinValue(0)
.setMaxValue(40)
.setSelectivity(1.0);
slotToColumnStat.put(a.getExprId(), builderA.build());
slotToColumnStat.put(b.getExprId(), builderB.build());
slotToColumnStat.put(c.getExprId(), builderC.build());
StatsDeriveResult stat = new StatsDeriveResult(100, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation(stat);
slotToColumnStat.put(a, builderA.build());
slotToColumnStat.put(b, builderB.build());
slotToColumnStat.put(c, builderC.build());
Statistics stat = new Statistics(100, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
GreaterThan greaterThan = new GreaterThan(c, i10);
StatsDeriveResult estimated = filterEstimation.estimate(greaterThan);
ColumnStatistic statsA = estimated.getColumnStatsBySlot(a);
ColumnStatistic statsB = estimated.getColumnStatsBySlot(b);
ColumnStatistic statsC = estimated.getColumnStatsBySlot(c);
Statistics estimated = filterEstimation.estimate(greaterThan, stat);
ColumnStatistic statsA = estimated.findColumnStatistics(a);
ColumnStatistic statsB = estimated.findColumnStatistics(b);
ColumnStatistic statsC = estimated.findColumnStatistics(c);
System.out.println(statsA);
System.out.println(statsB);
System.out.println(statsC);
Assertions.assertEquals(75, statsA.ndv);
Assertions.assertEquals(0, statsA.minValue);
Assertions.assertEquals(100, statsA.maxValue);
Assertions.assertEquals(0.75, statsA.selectivity);
Assertions.assertEquals(20, statsB.ndv);
Assertions.assertEquals(15, statsB.ndv);
Assertions.assertEquals(0, statsB.minValue);
Assertions.assertEquals(500, statsB.maxValue);
Assertions.assertEquals(1.0, statsB.selectivity);
Assertions.assertEquals(30, statsC.ndv);
Assertions.assertEquals(10, statsC.minValue);
Assertions.assertEquals(40, statsC.maxValue);
Assertions.assertEquals(1.0, statsC.selectivity);
}
}

View File

@ -18,11 +18,12 @@
package org.apache.doris.nereids.stats;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.common.Id;
import org.apache.doris.nereids.memo.Group;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
@ -40,7 +41,7 @@ import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
@ -85,7 +86,7 @@ public class StatsCalculatorTest {
// List<Expression> groupByExprList = new ArrayList<>();
// groupByExprList.add(slot1);
// AggregateFunction sum = new Sum(slot2);
// StatsDeriveResult childStats = new StatsDeriveResult(20, slotColumnStatsMap);
// Statistics childStats = new Statistics(20, slotColumnStatsMap);
// Alias alias = new Alias(sum, "a");
// Group childGroup = newGroup();
// childGroup.setLogicalProperties(new LogicalProperties(new Supplier<List<Slot>>() {
@ -123,10 +124,10 @@ public class StatsCalculatorTest {
columnStat2.setMaxValue(1000);
columnStat2.setNumNulls(10);
Map<Id, ColumnStatistic> slotColumnStatsMap = new HashMap<>();
slotColumnStatsMap.put(slot1.getExprId(), columnStat1.build());
slotColumnStatsMap.put(slot2.getExprId(), columnStat2.build());
StatsDeriveResult childStats = new StatsDeriveResult(10000, slotColumnStatsMap);
Map<Expression, ColumnStatistic> slotColumnStatsMap = new HashMap<>();
slotColumnStatsMap.put(slot1, columnStat1.build());
slotColumnStatsMap.put(slot2, columnStat2.build());
Statistics childStats = new Statistics(10000, slotColumnStatsMap);
EqualTo eq1 = new EqualTo(slot1, new IntegerLiteral(1));
EqualTo eq2 = new EqualTo(slot2, new IntegerLiteral(2));
@ -144,14 +145,14 @@ public class StatsCalculatorTest {
Group ownerGroup = newGroup();
groupExpression.setOwnerGroup(ownerGroup);
StatsCalculator.estimate(groupExpression);
Assertions.assertEquals((long) (10000 * 0.1 * 0.05), ownerGroup.getStatistics().getRowCount(), 0.001);
Assertions.assertEquals((long) 500, ownerGroup.getStatistics().getRowCount(), 0.001);
LogicalFilter<GroupPlan> logicalFilterOr = new LogicalFilter<>(or, groupPlan);
GroupExpression groupExpressionOr = new GroupExpression(logicalFilterOr, ImmutableList.of(childGroup));
Group ownerGroupOr = newGroup();
groupExpressionOr.setOwnerGroup(ownerGroupOr);
StatsCalculator.estimate(groupExpressionOr);
Assertions.assertEquals((long) (10000 * (0.1 + 0.05 - 0.1 * 0.05)),
Assertions.assertEquals((long) 1000,
ownerGroupOr.getStatistics().getRowCount(), 0.001);
}
@ -176,15 +177,15 @@ public class StatsCalculatorTest {
columnStat2.setMaxValue(100);
columnStat2.setNumNulls(10);
Map<Id, ColumnStatistic> slotColumnStatsMap = new HashMap<>();
slotColumnStatsMap.put(slot1.getExprId(), columnStat1.build());
slotColumnStatsMap.put(slot2.getExprId(), columnStat2.build());
StatsDeriveResult childStats = new StatsDeriveResult(10000, slotColumnStatsMap);
Map<Expression, ColumnStatistic> slotColumnStatsMap = new HashMap<>();
slotColumnStatsMap.put(slot1, columnStat1.build());
slotColumnStatsMap.put(slot2, columnStat2.build());
Statistics childStats = new Statistics(10000, slotColumnStatsMap);
EqualTo eq1 = new EqualTo(slot1, new IntegerLiteral(200));
EqualTo eq2 = new EqualTo(slot2, new IntegerLiteral(300));
ImmutableSet and = ImmutableSet.of(eq1, eq2);
ImmutableSet and = ImmutableSet.of(new And(eq1, eq2));
ImmutableSet or = ImmutableSet.of(new Or(eq1, eq2));
Group childGroup = newGroup();
@ -225,10 +226,10 @@ public class StatsCalculatorTest {
// slotColumnStatsMap2.put(slot2, columnStats2);
//
// final long leftRowCount = 5000;
// StatsDeriveResult leftStats = new StatsDeriveResult(leftRowCount, slotColumnStatsMap1);
// Statistics leftStats = new Statistics(leftRowCount, slotColumnStatsMap1);
//
// final long rightRowCount = 10000;
// StatsDeriveResult rightStats = new StatsDeriveResult(rightRowCount, slotColumnStatsMap2);
// Statistics rightStats = new Statistics(rightRowCount, slotColumnStatsMap2);
//
// EqualTo equalTo = new EqualTo(slot1, slot2);
//
@ -238,9 +239,9 @@ public class StatsCalculatorTest {
// JoinType.LEFT_SEMI_JOIN, Lists.newArrayList(equalTo), Optional.empty(), scan1, scan2);
// LogicalJoin<LogicalOlapScan, LogicalOlapScan> fakeInnerJoin = new LogicalJoin<>(
// JoinType.INNER_JOIN, Lists.newArrayList(equalTo), Optional.empty(), scan1, scan2);
// StatsDeriveResult semiJoinStats = JoinEstimation.estimate(leftStats, rightStats, fakeSemiJoin);
// Statistics semiJoinStats = JoinEstimation.estimate(leftStats, rightStats, fakeSemiJoin);
// Assertions.assertEquals(leftRowCount, semiJoinStats.getRowCount());
// StatsDeriveResult innerJoinStats = JoinEstimation.estimate(leftStats, rightStats, fakeInnerJoin);
// Statistics innerJoinStats = JoinEstimation.estimate(leftStats, rightStats, fakeInnerJoin);
// Assertions.assertEquals(2500000, innerJoinStats.getRowCount());
// }
@ -258,9 +259,9 @@ public class StatsCalculatorTest {
Group ownerGroup = newGroup();
groupExpression.setOwnerGroup(ownerGroup);
StatsCalculator.estimate(groupExpression);
StatsDeriveResult stats = ownerGroup.getStatistics();
Assertions.assertEquals(1, stats.getSlotIdToColumnStats().size());
Assertions.assertNotNull(stats.getSlotIdToColumnStats().get(slot1.getExprId()));
Statistics stats = ownerGroup.getStatistics();
Assertions.assertEquals(1, stats.columnStatistics().size());
Assertions.assertNotNull(stats.columnStatistics().get(slot1));
}
@Test
@ -272,9 +273,9 @@ public class StatsCalculatorTest {
ColumnStatisticBuilder columnStat1 = new ColumnStatisticBuilder();
columnStat1.setNdv(10);
columnStat1.setNumNulls(5);
Map<Id, ColumnStatistic> slotColumnStatsMap = new HashMap<>();
slotColumnStatsMap.put(slot1.getExprId(), columnStat1.build());
StatsDeriveResult childStats = new StatsDeriveResult(10, slotColumnStatsMap);
Map<Expression, ColumnStatistic> slotColumnStatsMap = new HashMap<>();
slotColumnStatsMap.put(slot1, columnStat1.build());
Statistics childStats = new Statistics(10, slotColumnStatsMap);
Group childGroup = newGroup();
childGroup.setLogicalProperties(new LogicalProperties(Collections::emptyList));
@ -288,9 +289,9 @@ public class StatsCalculatorTest {
Group ownerGroup = newGroup();
ownerGroup.addGroupExpression(groupExpression);
StatsCalculator.estimate(groupExpression);
StatsDeriveResult limitStats = ownerGroup.getStatistics();
Statistics limitStats = ownerGroup.getStatistics();
Assertions.assertEquals(1, limitStats.getRowCount());
ColumnStatistic slot1Stats = limitStats.getSlotIdToColumnStats().get(slot1.getExprId());
ColumnStatistic slot1Stats = limitStats.columnStatistics().get(slot1);
Assertions.assertEquals(1, slot1Stats.ndv);
Assertions.assertEquals(1, slot1Stats.numNulls);
}
@ -304,9 +305,9 @@ public class StatsCalculatorTest {
ColumnStatisticBuilder columnStat1 = new ColumnStatisticBuilder();
columnStat1.setNdv(10);
columnStat1.setNumNulls(5);
Map<Id, ColumnStatistic> slotColumnStatsMap = new HashMap<>();
slotColumnStatsMap.put(slot1.getExprId(), columnStat1.build());
StatsDeriveResult childStats = new StatsDeriveResult(10, slotColumnStatsMap);
Map<Expression, ColumnStatistic> slotColumnStatsMap = new HashMap<>();
slotColumnStatsMap.put(slot1, columnStat1.build());
Statistics childStats = new Statistics(10, slotColumnStatsMap);
Group childGroup = newGroup();
childGroup.setLogicalProperties(new LogicalProperties(Collections::emptyList));
@ -318,9 +319,9 @@ public class StatsCalculatorTest {
Group ownerGroup = newGroup();
ownerGroup.addGroupExpression(groupExpression);
StatsCalculator.estimate(groupExpression);
StatsDeriveResult topNStats = ownerGroup.getStatistics();
Statistics topNStats = ownerGroup.getStatistics();
Assertions.assertEquals(1, topNStats.getRowCount());
ColumnStatistic slot1Stats = topNStats.getSlotIdToColumnStats().get(slot1.getExprId());
ColumnStatistic slot1Stats = topNStats.columnStatistics().get(slot1);
Assertions.assertEquals(1, slot1Stats.ndv);
Assertions.assertEquals(1, slot1Stats.numNulls);
}

View File

@ -31,7 +31,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.types.StringType;
import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
import org.junit.jupiter.api.Assertions;
@ -100,7 +100,7 @@ public class PlanOutputTest {
@Override
public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
StatsDeriveResult statsDeriveResult) {
Statistics statsDeriveResult) {
return null;
}

View File

@ -17,7 +17,6 @@
package org.apache.doris.nereids.util;
import org.apache.doris.common.Id;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.jobs.cascades.DeriveStatsJob;
@ -33,7 +32,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
@ -187,13 +186,14 @@ public class HyperGraphBuilder {
private void injectRowcount(Group group) {
if (!group.isJoinGroup()) {
LogicalOlapScan scanPlan = (LogicalOlapScan) group.getLogicalExpression().getPlan();
HashMap<Id, ColumnStatistic> slotIdToColumnStats = new HashMap<Id, ColumnStatistic>();
HashMap<Expression, ColumnStatistic> slotIdToColumnStats = new HashMap<Expression, ColumnStatistic>();
int count = rowCounts.get(Integer.parseInt(scanPlan.getTable().getName()));
for (Slot slot : scanPlan.getOutput()) {
slotIdToColumnStats.put(slot.getExprId(),
new ColumnStatistic(count, count, 0, 0, 0, 0, 0, 0, null, null, true));
slotIdToColumnStats.put(slot,
new ColumnStatistic(count, count, 0, 0, 0, 0,
0, 0, null, null, true, null));
}
StatsDeriveResult stats = new StatsDeriveResult(count, slotIdToColumnStats);
Statistics stats = new Statistics(count, slotIdToColumnStats);
group.setStatistics(stats);
return;
}

View File

@ -48,18 +48,18 @@ public class CacheTest extends TestWithFeService {
try {
Thread.sleep(50);
} catch (InterruptedException e) {
return ColumnStatistic.DEFAULT;
return ColumnStatistic.UNKNOWN;
}
return ColumnStatistic.DEFAULT;
return ColumnStatistic.UNKNOWN;
});
}
};
StatisticsCache statisticsCache = new StatisticsCache();
ColumnStatistic c = statisticsCache.getColumnStatistics(1, "col");
Assertions.assertEquals(c, ColumnStatistic.DEFAULT);
Assertions.assertEquals(c, ColumnStatistic.UNKNOWN);
Thread.sleep(100);
c = statisticsCache.getColumnStatistics(1, "col");
Assertions.assertEquals(c, ColumnStatistic.DEFAULT);
Assertions.assertEquals(c, ColumnStatistic.UNKNOWN);
}
@Test
@ -120,7 +120,7 @@ public class CacheTest extends TestWithFeService {
};
StatisticsCache statisticsCache = new StatisticsCache();
ColumnStatistic columnStatistic = statisticsCache.getColumnStatistics(0, "col");
Assertions.assertEquals(ColumnStatistic.DEFAULT, columnStatistic);
Assertions.assertEquals(ColumnStatistic.UNKNOWN, columnStatistic);
Thread.sleep(1000);
columnStatistic = statisticsCache.getColumnStatistics(0, "col");
Assertions.assertEquals(1, columnStatistic.count);
@ -186,12 +186,6 @@ public class CacheTest extends TestWithFeService {
StatisticsCache statisticsCache = new StatisticsCache();
Histogram histogram = statisticsCache.getHistogram(0, "col");
Assertions.assertEquals(Histogram.DEFAULT, histogram);
Thread.sleep(1000);
histogram = statisticsCache.getHistogram(0, "col");
Assertions.assertEquals("DATETIME", histogram.dataType.toString());
Assertions.assertEquals(5, histogram.numBuckets);
Assertions.assertEquals(0.2, histogram.sampleRate);
Assertions.assertEquals(5, histogram.buckets.size());
Assertions.assertEquals(null, histogram);
}
}

View File

@ -21,11 +21,11 @@ import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.commons.math3.util.Precision;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -60,7 +60,7 @@ class HistogramTest {
Type dataType = histogramUnderTest.dataType;
Assertions.assertTrue(dataType.isDatetime());
int numBuckets = histogramUnderTest.numBuckets;
int numBuckets = histogramUnderTest.buckets.size();
Assertions.assertEquals(5, numBuckets);
double sampleRate = histogramUnderTest.sampleRate;
@ -69,17 +69,18 @@ class HistogramTest {
List<Bucket> buckets = histogramUnderTest.buckets;
Assertions.assertEquals(5, buckets.size());
LiteralExpr expectedLower = LiteralExpr.create("2022-09-21 17:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
LiteralExpr expectedUpper = LiteralExpr.create("2022-09-21 22:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
double expectedLower = LiteralExpr.create("2022-09-21 17:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME))).getDoubleValue();
double expectedUpper = LiteralExpr.create("2022-09-21 22:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME))).getDoubleValue();
boolean flag = false;
for (Bucket bucket : buckets) {
LiteralExpr lower = bucket.getLower();
LiteralExpr upper = bucket.getUpper();
if (expectedLower.equals(lower) && expectedUpper.equals(upper)) {
double lower = bucket.getLower();
double upper = bucket.getUpper();
if (Precision.equals(expectedLower, lower, 0.01)
&& Precision.equals(expectedUpper, upper, 0.01)) {
flag = true;
break;
}
@ -94,7 +95,6 @@ class HistogramTest {
JsonObject histogramJson = JsonParser.parseString(json).getAsJsonObject();
String typeStr = histogramJson.get("data_type").getAsString();
Type datatype = Type.fromPrimitiveType(PrimitiveType.valueOf(typeStr));
Assertions.assertEquals("DATETIME", typeStr);
int numBuckets = histogramJson.get("num_buckets").getAsInt();
@ -116,12 +116,14 @@ class HistogramTest {
for (int i = 0; i < jsonArray.size(); i++) {
JsonObject bucketJson = jsonArray.get(i).getAsJsonObject();
LiteralExpr lower = StatisticsUtil.readableValue(datatype, bucketJson.get("lower").getAsString());
LiteralExpr upper = StatisticsUtil.readableValue(datatype, bucketJson.get("upper").getAsString());
double lower = bucketJson.get("lower").getAsDouble();
double upper = bucketJson.get("upper").getAsDouble();
int count = bucketJson.get("count").getAsInt();
int preSum = bucketJson.get("pre_sum").getAsInt();
int ndv = bucketJson.get("ndv").getAsInt();
if (expectedLower.equals(lower) && expectedUpper.equals(upper) && count == 9 && preSum == 0 && ndv == 1) {
if (Precision.equals(expectedLower.getDoubleValue(), lower, 0.01)
&& Precision.equals(expectedUpper.getDoubleValue(), upper, 0.01)
&& count == 9 && preSum == 0 && ndv == 1) {
flag = true;
break;
}
@ -129,65 +131,4 @@ class HistogramTest {
Assertions.assertTrue(flag);
}
@Test
void testFindBucket() throws Exception {
// Setup
LiteralExpr key1 = LiteralExpr.create("2022-09-21 17:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
LiteralExpr key2 = LiteralExpr.create("2022-09-23 22:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
// Run the test
Bucket bucket1 = histogramUnderTest.findBucket(key1);
Bucket bucket2 = histogramUnderTest.findBucket(key2);
// Verify the results
Assertions.assertEquals(1, bucket1.getNdv());
Assertions.assertEquals(1, bucket2.getNdv());
Assertions.assertEquals(9, bucket1.getCount());
Assertions.assertEquals(9, bucket2.getCount());
Assertions.assertEquals(0, bucket1.getPreSum());
Assertions.assertEquals(19, bucket2.getPreSum());
LiteralExpr lower1 = LiteralExpr.create("2022-09-21 17:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
LiteralExpr lower2 = LiteralExpr.create("2022-09-23 17:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
LiteralExpr upper1 = LiteralExpr.create("2022-09-21 22:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
LiteralExpr upper2 = LiteralExpr.create("2022-09-23 22:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
Assertions.assertEquals(lower1, bucket1.getLower());
Assertions.assertEquals(lower2, bucket2.getLower());
Assertions.assertEquals(upper1, bucket1.getUpper());
Assertions.assertEquals(upper2, bucket2.getUpper());
}
@Test
void testRangeCount() throws Exception {
// Setup
LiteralExpr lower = LiteralExpr.create("2022-09-21 17:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
LiteralExpr upper = LiteralExpr.create("2022-09-23 17:30:29",
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
// Run the test
long count1 = histogramUnderTest.rangeCount(lower, true, upper, true);
long count2 = histogramUnderTest.rangeCount(lower, true, upper, false);
long count3 = histogramUnderTest.rangeCount(lower, false, upper, false);
long count4 = histogramUnderTest.rangeCount(lower, false, upper, true);
long count5 = histogramUnderTest.rangeCount(null, true, upper, true);
long count6 = histogramUnderTest.rangeCount(lower, true, null, true);
long count7 = histogramUnderTest.rangeCount(null, true, null, true);
// Verify the results
Assertions.assertEquals(28L, count1);
Assertions.assertEquals(19L, count2);
Assertions.assertEquals(10L, count3);
Assertions.assertEquals(19L, count4);
Assertions.assertEquals(28L, count5);
Assertions.assertEquals(46L, count6);
Assertions.assertEquals(46L, count7);
}
}

View File

@ -27,7 +27,7 @@ public class StatsDeriveResultTest {
public void testUpdateRowCountByLimit() {
StatsDeriveResult stats = new StatsDeriveResult(100);
ColumnStatistic a = new ColumnStatistic(100, 10, 1, 5, 10,
1, 100, 0.5, null, null, false);
1, 100, 0.5, null, null, false, null);
Id id = new Id(1);
stats.addColumnStats(id, a);
StatsDeriveResult res = stats.updateByLimit(0);