[feature](nereids) adjust min/max of column stats for cast function (#21772)

cast(A as date), where A is a string column. the min/max of result column stats should be calc like this:
convert A.minExpr to a date dateA, and then get double value from dateA.

add "explain memo plan select ..." to print memo from mysql client

dump column stats for FileScanNode, used in datalake.
This commit is contained in:
minghong
2023-07-14 12:54:04 +08:00
committed by GitHub
parent b013f8006d
commit 62214cd1f4
8 changed files with 49 additions and 7 deletions

View File

@ -251,6 +251,7 @@ MATCH_ANY: 'MATCH_ANY';
MATCH_ALL: 'MATCH_ALL';
MATCH_PHRASE: 'MATCH_PHRASE';
MATCHED: 'MATCHED';
MEMO:'MEMO';
MERGE: 'MERGE';
MINUTE: 'MINUTE';
MONTH: 'MONTH';

View File

@ -78,6 +78,7 @@ planType
| REWRITTEN | LOGICAL // same type
| OPTIMIZED | PHYSICAL // same type
| SHAPE
| MEMO
| ALL // default type
;

View File

@ -408,6 +408,10 @@ public class NereidsPlanner extends Planner {
return "cost = " + cost + "\n" + optimizedPlan.treeString();
case SHAPE_PLAN:
return optimizedPlan.shape("");
case MEMO_PLAN:
return cascadesContext.getMemo().toString()
+ "\n\n========== OPTIMIZED PLAN ==========\n"
+ optimizedPlan.treeString();
case ALL_PLAN:
return "========== PARSED PLAN ==========\n"
+ parsedPlan.treeString() + "\n\n"

View File

@ -34,6 +34,7 @@ import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.plans.GroupPlan;
import org.apache.doris.nereids.trees.plans.LeafPlan;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
@ -737,10 +738,12 @@ public class Memo {
builder.append("\n\n").append(group);
builder.append(" stats=").append(group.getStatistics()).append("\n");
Statistics stats = group.getStatistics();
if (stats != null && !group.getLogicalExpressions().isEmpty()
&& group.getLogicalExpressions().get(0).getPlan() instanceof LogicalOlapScan) {
for (Entry e : stats.columnStatistics().entrySet()) {
builder.append(" ").append(e.getKey()).append(":").append(e.getValue()).append("\n");
if (stats != null && !group.getLogicalExpressions().isEmpty()) {
Plan plan = group.getLogicalExpressions().get(0).getPlan();
if (plan instanceof LogicalOlapScan || plan instanceof LogicalFileScan) {
for (Entry e : stats.columnStatistics().entrySet()) {
builder.append(" ").append(e.getKey()).append(":").append(e.getValue()).append("\n");
}
}
}

View File

@ -1901,6 +1901,9 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
if (planTypeContext.SHAPE() != null) {
return ExplainLevel.SHAPE_PLAN;
}
if (planTypeContext.MEMO() != null) {
return ExplainLevel.MEMO_PLAN;
}
return ExplainLevel.ALL_PLAN;
}

View File

@ -18,6 +18,8 @@
package org.apache.doris.nereids.stats;
import org.apache.doris.analysis.ArithmeticExpr.Operator;
import org.apache.doris.analysis.StringLiteral;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Add;
import org.apache.doris.nereids.trees.expressions.AggregateExpression;
import org.apache.doris.nereids.trees.expressions.Alias;
@ -84,6 +86,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.Year;
import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsSub;
import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;
@ -140,12 +143,38 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
return columnStat.build();
}
@Override
public ColumnStatistic visitCast(Cast cast, Statistics context) {
ColumnStatistic stats = context.findColumnStatistics(cast);
if (stats != null) {
return stats;
}
return cast.child().accept(this, context);
ColumnStatistic childColStats = cast.child().accept(this, context);
return castMinMax(childColStats, cast.getDataType());
}
private ColumnStatistic castMinMax(ColumnStatistic colStats, DataType targetType) {
if (colStats.minExpr instanceof StringLiteral && targetType.isDateLikeType()) {
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(colStats);
if (colStats.minExpr != null && colStats.maxExpr != null) {
String strMin = colStats.minExpr.getStringValue();
try {
DateLiteral dateMinLiteral = new DateLiteral(strMin);
long min = dateMinLiteral.getValue();
builder.setMinValue(min);
String strMax = colStats.maxExpr.getStringValue();
DateLiteral dateMaxLiteral = new DateLiteral(strMax);
long max = dateMaxLiteral.getValue();
builder.setMaxValue(max);
} catch (AnalysisException e) {
// ignore exception. do not convert min max
}
}
return builder.build();
}
return colStats;
}
@Override

View File

@ -46,6 +46,7 @@ public class ExplainCommand extends Command implements NoForward {
REWRITTEN_PLAN(true),
OPTIMIZED_PLAN(true),
SHAPE_PLAN(true),
MEMO_PLAN(true),
ALL_PLAN(true)
;

View File

@ -314,8 +314,8 @@ public class ColumnStatistic {
@Override
public String toString() {
return isUnKnown ? "unKnown" : String.format("ndv=%.4f, min=%f, max=%f, sel=%f, count=%.4f",
ndv, minValue, maxValue, selectivity, count);
return isUnKnown ? "unknown" : String.format("ndv=%.4f, min=%f(%s), max=%f(%s), count=%.4f",
ndv, minValue, minExpr, maxValue, maxExpr, count);
}
public JSONObject toJson() {