[opt](nereids) compare str literal as date literal to compute selectivity (#35610)
this pr improves #34542, when the real data type is date-like type. Some users are likely to define date(datetime) column as Varchar type. when estimating the selectivity of predicate like A>'2020-01-01', if nereids regards A and '2020-01-01' as date type, the sel is more accurate than that as string type.
This commit is contained in:
@ -786,6 +786,10 @@ public class DateLiteral extends LiteralExpr {
|
||||
return getLongValue();
|
||||
}
|
||||
|
||||
public double getDoubleValueAsDateTime() {
|
||||
return (year * 10000 + month * 100 + day) * 1000000L + hour * 10000 + minute * 100 + second;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void toThrift(TExprNode msg) {
|
||||
if (type.isDatetimeV2()) {
|
||||
|
||||
@ -17,7 +17,9 @@
|
||||
|
||||
package org.apache.doris.nereids.stats;
|
||||
|
||||
import org.apache.doris.analysis.DateLiteral;
|
||||
import org.apache.doris.analysis.LiteralExpr;
|
||||
import org.apache.doris.analysis.StringLiteral;
|
||||
import org.apache.doris.nereids.stats.FilterEstimation.EstimationContext;
|
||||
import org.apache.doris.nereids.trees.TreeNode;
|
||||
import org.apache.doris.nereids.trees.expressions.And;
|
||||
@ -39,7 +41,10 @@ import org.apache.doris.nereids.trees.expressions.Slot;
|
||||
import org.apache.doris.nereids.trees.expressions.SlotReference;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.Function;
|
||||
import org.apache.doris.nereids.trees.expressions.literal.Literal;
|
||||
import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral;
|
||||
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
|
||||
import org.apache.doris.nereids.types.DataType;
|
||||
import org.apache.doris.nereids.types.DateTimeType;
|
||||
import org.apache.doris.nereids.types.coercion.RangeScalable;
|
||||
import org.apache.doris.statistics.ColumnStatistic;
|
||||
import org.apache.doris.statistics.ColumnStatisticBuilder;
|
||||
@ -50,7 +55,10 @@ import org.apache.doris.statistics.StatisticsBuilder;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
@ -183,22 +191,22 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
}
|
||||
}
|
||||
|
||||
private Statistics updateLessThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft,
|
||||
private Statistics updateLessThanLiteral(Expression leftExpr, DataType dataType, ColumnStatistic statsForLeft,
|
||||
ColumnStatistic statsForRight, EstimationContext context) {
|
||||
StatisticRange rightRange = new StatisticRange(statsForLeft.minValue, statsForLeft.minExpr,
|
||||
statsForRight.maxValue, statsForRight.maxExpr,
|
||||
statsForLeft.ndv, leftExpr.getDataType());
|
||||
return estimateBinaryComparisonFilter(leftExpr,
|
||||
statsForLeft.ndv, dataType);
|
||||
return estimateBinaryComparisonFilter(leftExpr, dataType,
|
||||
statsForLeft,
|
||||
rightRange, context);
|
||||
}
|
||||
|
||||
private Statistics updateGreaterThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft,
|
||||
private Statistics updateGreaterThanLiteral(Expression leftExpr, DataType dataType, ColumnStatistic statsForLeft,
|
||||
ColumnStatistic statsForRight, EstimationContext context) {
|
||||
StatisticRange rightRange = new StatisticRange(statsForRight.minValue, statsForRight.minExpr,
|
||||
statsForLeft.maxValue, statsForLeft.maxExpr,
|
||||
statsForLeft.ndv, leftExpr.getDataType());
|
||||
return estimateBinaryComparisonFilter(leftExpr, statsForLeft, rightRange, context);
|
||||
statsForLeft.ndv, dataType);
|
||||
return estimateBinaryComparisonFilter(leftExpr, dataType, statsForLeft, rightRange, context);
|
||||
}
|
||||
|
||||
private Statistics calculateWhenLiteralRight(ComparisonPredicate cp,
|
||||
@ -210,16 +218,113 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
if (cp instanceof EqualPredicate) {
|
||||
return estimateEqualTo(cp, statsForLeft, statsForRight, context);
|
||||
} else {
|
||||
// literal Map used to covert dateLiteral back to stringLiteral
|
||||
Map<DateLiteral, StringLiteral> literalMap = new HashMap<>();
|
||||
DataType compareType = cp.left().getDataType();
|
||||
Optional<ColumnStatistic> statsForLeftMayConvertedOpt =
|
||||
tryConvertStringColStatsToDateColStats(statsForLeft, literalMap);
|
||||
Optional<ColumnStatistic> statsForRightMayConvertedOpt = (statsForLeftMayConvertedOpt.isPresent())
|
||||
? tryConvertStringColStatsToDateColStats(statsForRight, literalMap)
|
||||
: Optional.empty();
|
||||
|
||||
boolean converted = false;
|
||||
ColumnStatistic statsForLeftMayConverted = statsForLeft;
|
||||
ColumnStatistic statsForRightMayConverted = statsForRight;
|
||||
if (statsForLeftMayConvertedOpt.isPresent() && statsForRightMayConvertedOpt.isPresent()
|
||||
&& statsForRightMayConvertedOpt.get().minExpr.getType()
|
||||
== statsForLeftMayConvertedOpt.get().minExpr.getType()) {
|
||||
// string type is converted to date type
|
||||
converted = true;
|
||||
compareType = DateTimeType.INSTANCE;
|
||||
statsForLeftMayConverted = statsForLeftMayConvertedOpt.get();
|
||||
statsForRightMayConverted = statsForRightMayConvertedOpt.get();
|
||||
}
|
||||
Statistics result = null;
|
||||
if (cp instanceof LessThan || cp instanceof LessThanEqual) {
|
||||
return updateLessThanLiteral(cp.left(), statsForLeft, statsForRight, context);
|
||||
result = updateLessThanLiteral(cp.left(), compareType, statsForLeftMayConverted,
|
||||
statsForRightMayConverted, context);
|
||||
} else if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
|
||||
return updateGreaterThanLiteral(cp.left(), statsForLeft, statsForRight, context);
|
||||
result = updateGreaterThanLiteral(cp.left(), compareType, statsForLeftMayConverted,
|
||||
statsForRightMayConverted, context);
|
||||
} else {
|
||||
throw new RuntimeException(String.format("Unexpected expression : %s", cp.toSql()));
|
||||
}
|
||||
if (converted) {
|
||||
// convert min/max of left.colStats back to string type
|
||||
ColumnStatistic newLeftStats = result.findColumnStatistics(cp.left());
|
||||
result.addColumnStats(cp.left(), convertDateColStatsToStringColStats(newLeftStats, literalMap));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private ColumnStatistic convertDateColStatsToStringColStats(ColumnStatistic colStats,
|
||||
Map<DateLiteral, StringLiteral> literalMap) {
|
||||
if (colStats.minExpr == null && colStats.maxExpr == null) {
|
||||
// when sel=0, minExpr and maxExpr are both null
|
||||
return colStats;
|
||||
}
|
||||
Preconditions.checkArgument(colStats.minExpr instanceof DateLiteral
|
||||
&& colStats.maxExpr instanceof DateLiteral,
|
||||
"cannot convert colStats back to stringType %s", colStats.toString());
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(colStats);
|
||||
StringLiteral newMinLiteral = new StringLiteral(colStats.maxExpr.toString());
|
||||
return builder.setMaxExpr(newMinLiteral)
|
||||
.setMaxExpr(literalMap.get(colStats.maxExpr))
|
||||
.setMaxValue(StringLikeLiteral.getDouble(colStats.maxExpr.toString()))
|
||||
.setMinExpr(literalMap.get(colStats.minExpr))
|
||||
.setMinValue(StringLikeLiteral.getDouble(colStats.minExpr.getStringValue()))
|
||||
.build();
|
||||
}
|
||||
|
||||
private Optional<ColumnStatistic> tryConvertStringColStatsToDateColStats(ColumnStatistic colStats,
|
||||
Map<DateLiteral, StringLiteral> literalMap) {
|
||||
if (colStats.minExpr == null || colStats.maxExpr == null) {
|
||||
return Optional.empty();
|
||||
}
|
||||
if (!(colStats.minExpr instanceof StringLiteral) || !(colStats.maxExpr instanceof StringLiteral)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
Optional<DateLiteral> newMinExpr = tryConvertStrLiteralToDateLiteral(colStats.minExpr);
|
||||
if (newMinExpr.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
Optional<DateLiteral> newMaxExpr = tryConvertStrLiteralToDateLiteral(colStats.maxExpr);
|
||||
if (newMaxExpr.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
if (newMaxExpr.get().getType() != newMinExpr.get().getType()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
literalMap.put(newMinExpr.get(), (StringLiteral) colStats.minExpr);
|
||||
literalMap.put(newMaxExpr.get(), (StringLiteral) colStats.maxExpr);
|
||||
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(colStats);
|
||||
return Optional.of(builder.setMinValue(newMinExpr.get().getDoubleValueAsDateTime())
|
||||
.setMinExpr(newMinExpr.get())
|
||||
.setMaxValue(newMaxExpr.get().getDoubleValueAsDateTime())
|
||||
.setMaxExpr(newMaxExpr.get())
|
||||
.build());
|
||||
}
|
||||
|
||||
private Optional<DateLiteral> tryConvertStrLiteralToDateLiteral(LiteralExpr literal) {
|
||||
if (literal == null) {
|
||||
return Optional.empty();
|
||||
}
|
||||
if (!(literal instanceof StringLiteral)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
DateLiteral dt = null;
|
||||
try {
|
||||
dt = new DateLiteral(literal.getStringValue());
|
||||
dt.checkValueValid();
|
||||
} catch (Exception e) {
|
||||
// ignore
|
||||
}
|
||||
return dt == null ? Optional.empty() : Optional.of(dt);
|
||||
}
|
||||
|
||||
private Statistics estimateEqualTo(ComparisonPredicate cp, ColumnStatistic statsForLeft,
|
||||
ColumnStatistic statsForRight,
|
||||
EstimationContext context) {
|
||||
@ -467,11 +572,11 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
}
|
||||
}
|
||||
|
||||
private Statistics estimateBinaryComparisonFilter(Expression leftExpr, ColumnStatistic leftStats,
|
||||
private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType dataType, ColumnStatistic leftStats,
|
||||
StatisticRange rightRange, EstimationContext context) {
|
||||
StatisticRange leftRange =
|
||||
new StatisticRange(leftStats.minValue, leftStats.minExpr, leftStats.maxValue, leftStats.maxExpr,
|
||||
leftStats.ndv, leftExpr.getDataType());
|
||||
leftStats.ndv, dataType);
|
||||
StatisticRange intersectRange = leftRange.cover(rightRange);
|
||||
|
||||
ColumnStatisticBuilder leftColumnStatisticBuilder;
|
||||
@ -495,7 +600,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
.setNdv(intersectRange.getDistinctValues())
|
||||
.setNumNulls(0);
|
||||
double sel = leftRange.overlapPercentWith(rightRange);
|
||||
if (!(leftExpr.getDataType() instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) {
|
||||
if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) {
|
||||
sel = DEFAULT_INEQUALITY_COEFFICIENT;
|
||||
}
|
||||
sel = getNotNullSelectivity(leftStats, sel);
|
||||
|
||||
@ -39,11 +39,18 @@ public abstract class StringLikeLiteral extends Literal {
|
||||
|
||||
@Override
|
||||
public double getDouble() {
|
||||
return getDouble(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* get double value
|
||||
*/
|
||||
public static double getDouble(String str) {
|
||||
long v = 0;
|
||||
int pos = 0;
|
||||
int len = Math.min(value.length(), 7);
|
||||
int len = Math.min(str.length(), 7);
|
||||
while (pos < len) {
|
||||
v += Byte.toUnsignedLong(value.getBytes()[pos]) << ((6 - pos) * 8);
|
||||
v += Byte.toUnsignedLong(str.getBytes()[pos]) << ((6 - pos) * 8);
|
||||
pos++;
|
||||
}
|
||||
return (double) v;
|
||||
|
||||
Reference in New Issue
Block a user