[opt](nereids) adjust column stats in filter estimation (#24973)

TPCDS before
query4  9335    8113    8070    8070
query13 3104    1386    1385    1385
query18 1704    1216    1151    1151
query48 840     840     839     839
query61 435     379     383     379
query71 715     570     579     570
query85 2822    2627    2612    2612
query88 1897    1816    1793    1793
Total cold run time: 20852 ms
Total hot run time: 16799 ms

after:
query4  9610    8287    8249    8249
query13 1721    1013    1042    1013
query18 1585    1186    1155    1155
query48 789     777     778     777
query61 384     387     381     381
query71 713     610     584     584
query85 2020    1867    1843    1843
query88 1859    1812    1805    1805
Total cold run time: 18681 ms
Total hot run time: 15807 ms
This commit is contained in:
minghong
2023-09-28 21:34:17 +08:00
committed by GitHub
parent 8eaf0d3a4b
commit 4c94820ff9
19 changed files with 377 additions and 266 deletions

View File

@ -125,7 +125,7 @@ public class DeriveStatsJob extends Job {
// child group's row count unchanged when the parent group expression is a project operation.
double parentRowCount = groupExpression.getOwnerGroup().getStatistics().getRowCount();
groupExpression.children().forEach(g -> g.setStatistics(
g.getStatistics().updateRowCountOnly(parentRowCount))
g.getStatistics().updateRowCountAndColStats(parentRowCount))
);
}
}

View File

@ -49,10 +49,11 @@ import org.apache.doris.statistics.StatisticRange;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.function.Predicate;
@ -83,12 +84,14 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
public Statistics estimate(Expression expression, Statistics statistics) {
// For a comparison predicate, only when it's left side is a slot and right side is a literal, we would
// consider is a valid predicate.
return expression.accept(this, new EstimationContext(statistics));
Statistics stats = expression.accept(this, new EstimationContext(statistics));
stats.enforceValid();
return stats;
}
@Override
public Statistics visit(Expression expr, EstimationContext context) {
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT, false);
}
@Override
@ -96,35 +99,37 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
Expression leftExpr = predicate.child(0);
Expression rightExpr = predicate.child(1);
Statistics leftStats = leftExpr.accept(this, context);
Statistics andStats = rightExpr.accept(new FilterEstimation(),
Statistics andStats = rightExpr.accept(this,
new EstimationContext(leftStats));
if (predicate instanceof And) {
return andStats;
} else if (predicate instanceof Or) {
Statistics rightStats = rightExpr.accept(this, context);
double rowCount = leftStats.getRowCount() + rightStats.getRowCount() - andStats.getRowCount();
Statistics orStats = context.statistics.withRowCount(rowCount);
for (Map.Entry<Expression, ColumnStatistic> entry : orStats.columnStatistics().entrySet()) {
ColumnStatistic leftColStats = leftStats.findColumnStatistics(entry.getKey());
ColumnStatistic rightColStats = rightStats.findColumnStatistics(entry.getKey());
ColumnStatisticBuilder estimatedColStatsBuilder = new ColumnStatisticBuilder(entry.getValue());
if (leftColStats.minValue <= rightColStats.minValue) {
estimatedColStatsBuilder.setMinValue(leftColStats.minValue);
estimatedColStatsBuilder.setMinExpr(leftColStats.minExpr);
} else {
estimatedColStatsBuilder.setMinValue(rightColStats.minValue);
estimatedColStatsBuilder.setMinExpr(rightColStats.minExpr);
}
if (leftColStats.maxValue >= rightColStats.maxValue) {
estimatedColStatsBuilder.setMaxValue(leftColStats.maxValue);
estimatedColStatsBuilder.setMaxExpr(leftColStats.maxExpr);
} else {
estimatedColStatsBuilder.setMaxValue(rightColStats.maxValue);
estimatedColStatsBuilder.setMaxExpr(rightColStats.maxExpr);
Statistics orStats = context.statistics.setRowCount(rowCount);
Set<Slot> leftInputSlots = leftExpr.getInputSlots();
Set<Slot> rightInputSlots = rightExpr.getInputSlots();
for (Slot slot : context.keyColumns) {
if (leftInputSlots.contains(slot) && rightInputSlots.contains(slot)) {
ColumnStatistic leftColStats = leftStats.findColumnStatistics(slot);
ColumnStatistic rightColStats = rightStats.findColumnStatistics(slot);
StatisticRange leftRange = StatisticRange.from(leftColStats, slot.getDataType());
StatisticRange rightRange = StatisticRange.from(rightColStats, slot.getDataType());
StatisticRange union = leftRange.union(rightRange);
ColumnStatisticBuilder colBuilder = new ColumnStatisticBuilder(
context.statistics.findColumnStatistics(slot));
colBuilder.setMinValue(union.getLow()).setMinExpr(union.getLowExpr())
.setMaxValue(union.getHigh()).setMaxExpr(union.getHighExpr())
.setNdv(union.getDistinctValues());
orStats.addColumnStats(slot, colBuilder.build());
}
}
return orStats;
}
// should not come here
Preconditions.checkArgument(false,
"unsupported compound operator: %s in %s",
predicate.getClass().getName(), predicate.toSql());
return context.statistics;
}
@ -159,7 +164,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
double rowCount = context.statistics.getRowCount();
double newRowCount = Math.max(rowCount * DEFAULT_HAVING_COEFFICIENT,
Math.max(statsForLeft.ndv, statsForRight.ndv));
return context.statistics.withRowCount(newRowCount);
return context.statistics.setRowCount(newRowCount);
}
}
if (!left.isConstant() && !right.isConstant()) {
@ -202,7 +207,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
private Statistics calculateWhenLiteralRight(ComparisonPredicate cp,
ColumnStatistic statsForLeft, ColumnStatistic statsForRight, EstimationContext context) {
if (statsForLeft.isUnKnown) {
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT, false);
}
if (cp instanceof EqualTo || cp instanceof NullSafeEqual) {
@ -236,9 +241,10 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
return estimateEqualToWithHistogram(cp.left(), statsForLeft, val, context);
}
Statistics equalStats = context.statistics.withSel(selectivity);
Statistics equalStats = context.statistics.withSel(selectivity, false);
Expression left = cp.left();
equalStats.addColumnStats(left, statsForRight);
context.addKeyIfSlot(left);
if (!(left instanceof SlotReference)) {
left.accept(new ColumnStatsAdjustVisitor(), equalStats);
}
@ -266,7 +272,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
Expression compareExpr = inPredicate.getCompareExpr();
ColumnStatistic compareExprStats = ExpressionEstimation.estimate(compareExpr, context.statistics);
if (compareExprStats.isUnKnown || compareExpr instanceof Function) {
return context.statistics.withSel(DEFAULT_IN_COEFFICIENT);
return context.statistics.withSel(DEFAULT_IN_COEFFICIENT, false);
}
List<Expression> options = inPredicate.getOptions();
// init minOption and maxOption by compareExpr.max and compareExpr.min respectively,
@ -342,10 +348,10 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
}
}
Statistics estimated = new Statistics(context.statistics);
estimated = estimated.withSel(selectivity);
estimated = estimated.withSel(selectivity, false);
estimated.addColumnStats(compareExpr,
compareExprStatsBuilder.build());
context.addKeyIfSlot(compareExpr);
return estimated;
}
@ -359,31 +365,55 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
if (context.statistics.isInputSlotsUnknown(not.getInputSlots())) {
return handleUnknownCase(context);
}
Statistics childStats = new FilterEstimation().estimate(not.child(), context.statistics);
Expression child = not.child();
Statistics childStats = child.accept(this, context);
//if estimated rowCount is 0, adjust to 1 to make upper join reorder reasonable.
double rowCount = Math.max(context.statistics.getRowCount() - childStats.getRowCount(), 1);
StatisticsBuilder statisticsBuilder = new StatisticsBuilder(context.statistics).setRowCount(rowCount);
for (Entry<Expression, ColumnStatistic> entry : context.statistics.columnStatistics().entrySet()) {
Expression expr = entry.getKey();
ColumnStatistic originColStats = entry.getValue();
ColumnStatistic childColStats = childStats.findColumnStatistics(expr);
double originNonNullCount = Math.max(originColStats.count - originColStats.numNulls, 0);
double childNonNullCount = Math.max(childColStats.count - childColStats.numNulls, 0);
double supersetValuesPerDistinctValue = StatsMathUtil.divide(originNonNullCount, originColStats.ndv);
double subsetValuesPerDistinctValue = StatsMathUtil.divide(childNonNullCount, childColStats.ndv);
double ndv;
if (supersetValuesPerDistinctValue <= subsetValuesPerDistinctValue) {
ndv = Math.max(originColStats.ndv - childColStats.ndv, 0);
} else {
ndv = originColStats.ndv;
// update key col stats
for (Slot slot : not.child().getInputSlots()) {
ColumnStatistic originColStats = context.statistics.findColumnStatistics(slot);
ColumnStatistic childColStats = childStats.findColumnStatistics(slot);
if (context.isKeySlot(slot)) {
ColumnStatisticBuilder colBuilder = new ColumnStatisticBuilder(childColStats);
// update column stats for
// 1. not (A=B)
// 2. not A in (...)
// 3. not A is null
// 4. not A like XXX
colBuilder.setNumNulls(0);
Preconditions.checkArgument(
child instanceof EqualTo
|| child instanceof InPredicate
|| child instanceof IsNull
|| child instanceof Like,
"Not-predicate meet unexpected child: %s", child.toSql());
if (child instanceof Like) {
rowCount = context.statistics.getRowCount() - childStats.getRowCount();
colBuilder.setNdv(originColStats.ndv - childColStats.ndv);
} else if (child instanceof InPredicate) {
colBuilder.setNdv(originColStats.ndv - childColStats.ndv);
colBuilder.setMinValue(originColStats.minValue)
.setMinExpr(originColStats.minExpr)
.setMaxValue(originColStats.maxValue)
.setMaxExpr(originColStats.maxExpr);
} else if (child instanceof IsNull) {
colBuilder.setNdv(originColStats.ndv);
colBuilder.setMinValue(originColStats.minValue)
.setMinExpr(originColStats.minExpr)
.setMaxValue(originColStats.maxValue)
.setMaxExpr(originColStats.maxExpr);
} else if (child instanceof EqualTo) {
colBuilder.setNdv(originColStats.ndv - childColStats.ndv);
colBuilder.setMinValue(originColStats.minValue)
.setMinExpr(originColStats.minExpr)
.setMaxValue(originColStats.maxValue)
.setMaxExpr(originColStats.maxExpr);
}
statisticsBuilder.putColumnStatistics(slot, colBuilder.build());
}
double nullCount = Math.max(originColStats.numNulls - childColStats.numNulls, 0);
ColumnStatistic columnStatistic = new ColumnStatisticBuilder(originColStats)
.setNdv(ndv)
.setNumNulls(nullCount)
.build();
statisticsBuilder.putColumnStatistics(expr, columnStatistic);
}
return statisticsBuilder.build();
}
@ -395,20 +425,37 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
}
double outputRowCount = childStats.numNulls;
ColumnStatisticBuilder colBuilder = new ColumnStatisticBuilder(childStats);
// do not modify ndv/min/max to make is-not-null work
colBuilder.setCount(outputRowCount).setNumNulls(outputRowCount);
colBuilder.setCount(outputRowCount).setNumNulls(outputRowCount)
.setMaxValue(Double.POSITIVE_INFINITY)
.setMinValue(Double.NEGATIVE_INFINITY)
.setNdv(0);
StatisticsBuilder builder = new StatisticsBuilder(context.statistics);
builder.putColumnStatistics(isNull.child(), colBuilder.build());
// TODO we do not call updateRowCountOnly() to make is-not-null work. this need refactor
context.addKeyIfSlot(isNull.child());
return builder.build();
}
static class EstimationContext {
private final Statistics statistics;
private final Set<Slot> keyColumns = Sets.newHashSet();
public EstimationContext(Statistics statistics) {
this.statistics = statistics;
}
public void addKeyIfSlot(Expression expr) {
if (expr instanceof Slot) {
keyColumns.add((Slot) expr);
}
}
public boolean isKeySlot(Expression expr) {
if (expr instanceof Slot) {
return keyColumns.contains((Slot) expr);
}
return false;
}
}
private Statistics estimateBinaryComparisonFilter(Expression leftExpr, ColumnStatistic leftStats,
@ -421,7 +468,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
ColumnStatisticBuilder leftColumnStatisticBuilder;
Statistics updatedStatistics;
if (intersectRange.isEmpty()) {
updatedStatistics = context.statistics.updateRowCountOnly(0);
updatedStatistics = context.statistics.setRowCount(0);
leftColumnStatisticBuilder = new ColumnStatisticBuilder(leftStats)
.setMinValue(Double.NEGATIVE_INFINITY)
.setMinExpr(null)
@ -437,10 +484,11 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
.setMaxExpr(intersectRange.getHighExpr())
.setNdv(intersectRange.getDistinctValues());
double sel = leftRange.overlapPercentWith(rightRange);
updatedStatistics = context.statistics.withSel(sel);
updatedStatistics = context.statistics.withSel(sel, false);
leftColumnStatisticBuilder.setCount(updatedStatistics.getRowCount());
}
updatedStatistics.addColumnStats(leftExpr, leftColumnStatisticBuilder.build());
context.addKeyIfSlot(leftExpr);
leftExpr.accept(new ColumnStatsAdjustVisitor(), updatedStatistics);
return updatedStatistics;
}
@ -450,19 +498,17 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
StatisticRange leftRange = StatisticRange.from(leftStats, leftExpr.getDataType());
StatisticRange rightRange = StatisticRange.from(rightStats, rightExpr.getDataType());
StatisticRange leftIntersectRight = leftRange.intersect(rightRange);
StatisticRange rightIntersectLeft = rightRange.intersect(leftIntersectRight);
ColumnStatisticBuilder leftBuilder = new ColumnStatisticBuilder(leftStats);
leftBuilder.setNdv(leftIntersectRight.getDistinctValues());
leftBuilder.setMinValue(leftIntersectRight.getLow());
leftBuilder.setMaxValue(leftIntersectRight.getHigh());
ColumnStatisticBuilder rightBuilder = new ColumnStatisticBuilder(rightStats);
rightBuilder.setNdv(rightIntersectLeft.getDistinctValues());
rightBuilder.setMinValue(rightIntersectLeft.getLow());
rightBuilder.setMaxValue(rightIntersectLeft.getDistinctValues());
StatisticRange intersect = rightRange.intersect(leftIntersectRight);
ColumnStatisticBuilder intersectBuilder = new ColumnStatisticBuilder(leftStats);
intersectBuilder.setNdv(intersect.getDistinctValues());
intersectBuilder.setMinValue(intersect.getLow());
intersectBuilder.setMaxValue(intersect.getHigh());
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
Statistics updatedStatistics = context.statistics.withSel(sel);
updatedStatistics.addColumnStats(leftExpr, leftBuilder.build());
updatedStatistics.addColumnStats(rightExpr, rightBuilder.build());
Statistics updatedStatistics = context.statistics.withSel(sel, false);
updatedStatistics.addColumnStats(leftExpr, intersectBuilder.build());
updatedStatistics.addColumnStats(rightExpr, intersectBuilder.build());
context.addKeyIfSlot(leftExpr);
context.addKeyIfSlot(rightExpr);
return updatedStatistics;
}
@ -474,16 +520,18 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
// Left always less than Right
if (leftRange.getHigh() < rightRange.getLow()) {
statistics =
context.statistics.withRowCount(Math.min(context.statistics.getRowCount() - leftStats.numNulls,
context.statistics.setRowCount(Math.min(context.statistics.getRowCount() - leftStats.numNulls,
context.statistics.getRowCount() - rightStats.numNulls));
statistics.addColumnStats(leftExpr, new ColumnStatisticBuilder(leftStats).setNumNulls(0.0).build());
statistics.addColumnStats(rightExpr, new ColumnStatisticBuilder(rightStats).setNumNulls(0.0).build());
context.addKeyIfSlot(leftExpr);
context.addKeyIfSlot(rightExpr);
return statistics;
}
double leftOverlapPercent = leftRange.overlapPercentWith(rightRange);
// Left always greater than right
if (leftOverlapPercent == 0) {
return context.statistics.withRowCount(0.0);
return context.statistics.setRowCount(0.0);
}
StatisticRange leftAlwaysLessThanRightRange = new StatisticRange(leftStats.minValue, leftStats.minExpr,
rightStats.minValue, rightStats.minExpr, Double.NaN, leftExpr.getDataType());
@ -514,7 +562,9 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
double sel = leftAlwaysLessThanRightPercent
+ leftOverlapPercent * rightOverlappingRangeFraction * DEFAULT_INEQUALITY_COEFFICIENT
+ leftOverlapPercent * rightAlwaysGreaterRangeFraction;
return context.statistics.withSel(sel)
context.addKeyIfSlot(leftExpr);
context.addKeyIfSlot(rightExpr);
return context.statistics.withSel(sel, false)
.addColumnStats(leftExpr, leftColumnStatistic)
.addColumnStats(rightExpr, rightColumnStatistic);
}
@ -547,10 +597,11 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
.setMaxValue(numVal)
.setHistogram(new HistogramBuilder(leftHist).setBuckets(updatedBucketList).build())
.build();
return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic);
context.addKeyIfSlot(leftExpr);
return context.statistics.withSel(sel, false).addColumnStats(leftExpr, columnStatistic);
}
}
return context.statistics.withSel(0);
return context.statistics.withSel(0, false);
}
private Statistics estimateGreaterThanLiteralWithHistogram(Expression leftExpr, ColumnStatistic leftStats,
@ -583,10 +634,11 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
.setMaxValue(numVal)
.setHistogram(new HistogramBuilder(leftHist).setBuckets(updatedBucketList).build())
.build();
return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic);
context.addKeyIfSlot(leftExpr);
return context.statistics.withSel(sel, false).addColumnStats(leftExpr, columnStatistic);
}
}
return context.statistics.withSel(0);
return context.statistics.withSel(0, false);
}
private Statistics estimateEqualToWithHistogram(Expression leftExpr, ColumnStatistic leftStats,
@ -610,11 +662,24 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
.setMaxValue(numVal)
.setMinValue(numVal)
.build();
return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic);
context.addKeyIfSlot(leftExpr);
return context.statistics.withSel(sel, false).addColumnStats(leftExpr, columnStatistic);
}
@Override
public Statistics visitLike(Like like, EstimationContext context) {
return context.statistics.withSel(DEFAULT_LIKE_COMPARISON_SELECTIVITY);
StatisticsBuilder statsBuilder = new StatisticsBuilder(context.statistics);
statsBuilder.setRowCount(context.statistics.getRowCount() * DEFAULT_LIKE_COMPARISON_SELECTIVITY);
if (like.left() instanceof Slot) {
ColumnStatistic origin = context.statistics.findColumnStatistics(like.left());
Preconditions.checkArgument(origin != null,
"col stats not found. slot=%s in %s",
like.left().toSql(), like.toSql());
ColumnStatisticBuilder colBuilder = new ColumnStatisticBuilder(origin);
colBuilder.setNdv(origin.ndv * DEFAULT_LIKE_COMPARISON_SELECTIVITY).setNumNulls(0);
statsBuilder.putColumnStatistics(like.left(), colBuilder.build());
context.addKeyIfSlot(like.left());
}
return statsBuilder.build();
}
}

View File

@ -142,7 +142,7 @@ public class JoinEstimation {
outputRowCount = Math.max(1, outputRowCount * ratio.get());
}
}
innerJoinStats = crossJoinStats.updateRowCountOnly(outputRowCount);
innerJoinStats = crossJoinStats.updateRowCountAndColStats(outputRowCount);
return innerJoinStats;
}

View File

@ -690,7 +690,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
// TODO: for the filter push down window situation, we will prune the row count twice
// because we keep the pushed down filter. And it will be calculated twice, one of them in 'PartitionTopN'
// and the other is in 'Filter'. It's hard to dismiss.
return childStats.updateRowCountOnly(rowCount);
return childStats.updateRowCountAndColStats(rowCount);
}
private Statistics computeLimit(Limit limit) {

View File

@ -44,14 +44,22 @@ public class StatisticRange {
private final DataType dataType;
private final boolean isEmpty;
public StatisticRange(double low, LiteralExpr lowExpr, double high, LiteralExpr highExpr,
double distinctValues, DataType dataType) {
this(low, lowExpr, high, highExpr, distinctValues, dataType, false);
}
private StatisticRange(double low, LiteralExpr lowExpr, double high, LiteralExpr highExpr,
double distinctValues, DataType dataType, boolean isEmpty) {
this.low = low;
this.lowExpr = lowExpr;
this.high = high;
this.highExpr = highExpr;
this.distinctValues = distinctValues;
this.dataType = dataType;
this.isEmpty = isEmpty;
}
public LiteralExpr getLowExpr() {
@ -100,17 +108,26 @@ public class StatisticRange {
}
public static StatisticRange empty(DataType dataType) {
return new StatisticRange(Double.NaN, null, Double.NaN, null, 0, dataType);
return new StatisticRange(Double.NEGATIVE_INFINITY, null, Double.POSITIVE_INFINITY,
null, 0, dataType, true);
}
public boolean isEmpty() {
return Double.isNaN(low) && Double.isNaN(high);
return isEmpty;
}
public boolean isBothInfinite() {
return Double.isInfinite(low) && Double.isInfinite(high);
}
public boolean isInfinite() {
return Double.isInfinite(low) || Double.isInfinite(high);
}
public boolean isFinite() {
return Double.isFinite(low) && Double.isFinite(high);
}
public static StatisticRange from(ColumnStatistic colStats, DataType dataType) {
return new StatisticRange(colStats.minValue, colStats.minExpr, colStats.maxValue, colStats.maxExpr,
colStats.ndv, dataType);

View File

@ -92,10 +92,14 @@ public class Statistics {
return statistics;
}
public Statistics setRowCount(double rowCount) {
return new Statistics(rowCount, new HashMap<>(expressionToColumnStats));
}
/**
* Update by count.
*/
public Statistics updateRowCountOnly(double rowCount) {
public Statistics updateRowCountAndColStats(double rowCount) {
Statistics statistics = new Statistics(rowCount, expressionToColumnStats);
for (Entry<Expression, ColumnStatistic> entry : expressionToColumnStats.entrySet()) {
ColumnStatistic columnStatistic = entry.getValue();
@ -144,8 +148,21 @@ public class Statistics {
}
public Statistics withSel(double sel) {
return withSel(sel, true);
}
public Statistics withSel(double sel, boolean updateColStats) {
sel = StatsMathUtil.minNonNaN(sel, 1);
return withRowCount(rowCount * sel);
if (Double.isNaN(rowCount)) {
return this;
}
double newCount = rowCount * sel;
double originCount = rowCount;
Statistics statistics = new Statistics(newCount, new HashMap<>(expressionToColumnStats));
if (updateColStats) {
statistics.fix(newCount, StatsMathUtil.nonZeroDivisor(originCount));
}
return statistics;
}
public Statistics addColumnStats(Expression expression, ColumnStatistic columnStatistic) {