[enhancement](Nereids): remove stats derivation in CostAndEnforce job (#24945)
1. remove stats derivation in CostAndEnforce job 2. enforce valid for each stats after estimating
This commit is contained in:
@ -88,7 +88,7 @@ public class ApplyRuleJob extends Job {
|
||||
} else {
|
||||
// The Join Commute rule preserves the operator's expression and children,
|
||||
// thereby not altering the statistics. Hence, there is no need to derive statistics for it.
|
||||
groupExpression.setStatDerived(true);
|
||||
newGroupExpression.setStatDerived(true);
|
||||
}
|
||||
} else {
|
||||
pushJob(new CostAndEnforcerJob(newGroupExpression, context));
|
||||
@ -100,7 +100,7 @@ public class ApplyRuleJob extends Job {
|
||||
// These implementation rules integrate rules for plan shape transformation.
|
||||
pushJob(new DeriveStatsJob(newGroupExpression, context));
|
||||
} else {
|
||||
groupExpression.setStatDerived(true);
|
||||
newGroupExpression.setStatDerived(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -31,7 +31,6 @@ import org.apache.doris.nereids.properties.ChildrenPropertiesRegulator;
|
||||
import org.apache.doris.nereids.properties.EnforceMissingPropertiesHelper;
|
||||
import org.apache.doris.nereids.properties.PhysicalProperties;
|
||||
import org.apache.doris.nereids.properties.RequestPropertyDeriver;
|
||||
import org.apache.doris.nereids.stats.StatsCalculator;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
@ -236,25 +235,13 @@ public class CostAndEnforcerJob extends Job implements Cloneable {
|
||||
PhysicalProperties outputProperty = childOutputPropertyDeriver.getOutputProperties(groupExpression);
|
||||
|
||||
// update current group statistics and re-compute costs.
|
||||
if (groupExpression.children().stream().anyMatch(group -> group.getStatistics() == null)) {
|
||||
// TODO: If it's error, add some warning log at least.
|
||||
if (groupExpression.children().stream().anyMatch(group -> group.getStatistics() == null)
|
||||
&& groupExpression.getOwnerGroup().getStatistics() == null) {
|
||||
// if we come here, mean that we have some error in stats calculator and should fix it.
|
||||
LOG.warn("Nereids try to calculate cost without stats for group expression {}", groupExpression);
|
||||
return false;
|
||||
}
|
||||
|
||||
StatsCalculator statsCalculator = StatsCalculator.estimate(groupExpression,
|
||||
context.getCascadesContext().getConnectContext().getSessionVariable().getForbidUnknownColStats(),
|
||||
context.getCascadesContext().getConnectContext().getTotalColumnStatisticMap(),
|
||||
context.getCascadesContext().getConnectContext().getSessionVariable().isPlayNereidsDump(),
|
||||
context.getCascadesContext());
|
||||
if (!context.getCascadesContext().getConnectContext().getSessionVariable().isPlayNereidsDump()
|
||||
&& context.getCascadesContext().getConnectContext().getSessionVariable().isEnableMinidump()) {
|
||||
context.getCascadesContext().getConnectContext().getTotalColumnStatisticMap()
|
||||
.putAll(statsCalculator.getTotalColumnStatisticMap());
|
||||
context.getCascadesContext().getConnectContext().getTotalHistogramMap()
|
||||
.putAll(statsCalculator.getTotalHistogramMap());
|
||||
}
|
||||
|
||||
// recompute cost after adjusting property
|
||||
curNodeCost = CostCalculator.calculateCost(groupExpression, requestChildrenProperties);
|
||||
groupExpression.setCost(curNodeCost);
|
||||
|
||||
@ -220,6 +220,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
|
||||
private void estimate() {
|
||||
Plan plan = groupExpression.getPlan();
|
||||
Statistics newStats = plan.accept(this, null);
|
||||
newStats.enforceValid();
|
||||
// We ensure that the rowCount remains unchanged in order to make the cost of each plan comparable.
|
||||
if (groupExpression.getOwnerGroup().getStatistics() == null) {
|
||||
groupExpression.getOwnerGroup().setStatistics(newStats);
|
||||
|
||||
@ -123,6 +123,26 @@ public class Statistics {
|
||||
}
|
||||
}
|
||||
|
||||
public void enforceValid() {
|
||||
for (Entry<Expression, ColumnStatistic> entry : expressionToColumnStats.entrySet()) {
|
||||
ColumnStatistic columnStatistic = entry.getValue();
|
||||
if (!checkColumnStatsValid(columnStatistic)) {
|
||||
double ndv = Math.min(columnStatistic.ndv, rowCount);
|
||||
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(columnStatistic);
|
||||
columnStatisticBuilder.setNdv(ndv);
|
||||
columnStatisticBuilder.setNumNulls(Math.min(columnStatistic.numNulls, rowCount - ndv));
|
||||
columnStatisticBuilder.setCount(rowCount);
|
||||
columnStatistic = columnStatisticBuilder.build();
|
||||
}
|
||||
expressionToColumnStats.put(entry.getKey(), columnStatistic);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean checkColumnStatsValid(ColumnStatistic columnStatistic) {
|
||||
return columnStatistic.ndv <= rowCount
|
||||
&& columnStatistic.numNulls <= rowCount - columnStatistic.ndv;
|
||||
}
|
||||
|
||||
public Statistics withSel(double sel) {
|
||||
sel = StatsMathUtil.minNonNaN(sel, 1);
|
||||
return withRowCount(rowCount * sel);
|
||||
|
||||
Reference in New Issue
Block a user