diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index 5e93864304..3a8a9c183e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -94,12 +94,15 @@ import org.apache.doris.nereids.trees.expressions.literal.DateLiteral; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DataType; +import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.Statistics; import com.google.common.base.Preconditions; import org.apache.commons.collections.CollectionUtils; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; import java.time.Instant; import java.time.LocalDate; @@ -111,7 +114,7 @@ import java.util.List; * Used to estimate for expressions that not producing boolean value. */ public class ExpressionEstimation extends ExpressionVisitor { - + public static final Logger LOG = LogManager.getLogger(ExpressionEstimation.class); public static final long DAYS_FROM_0_TO_1970 = 719528; public static final long DAYS_FROM_0_TO_9999 = 3652424; private static final ExpressionEstimation INSTANCE = new ExpressionEstimation(); @@ -120,11 +123,20 @@ public class ExpressionEstimation extends ExpressionVisitor { private void estimate() { Plan plan = groupExpression.getPlan(); - Statistics newStats = plan.accept(this, null); + Statistics newStats; + try { + newStats = plan.accept(this, null); + } catch (Exception e) { + // throw exception in debug mode + if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().feDebug) { + throw e; + } + LOG.warn("stats calculation failed, plan " + plan.toString(), e); + // use unknown stats or the first child's stats + if (plan.children().isEmpty() || !(plan.child(0) instanceof GroupPlan)) { + Map columnStatisticMap = new HashMap<>(); + for (Slot slot : plan.getOutput()) { + columnStatisticMap.put(slot, ColumnStatistic.createUnknownByDataType(slot.getDataType())); + } + newStats = new Statistics(1, 1, columnStatisticMap); + } else { + newStats = ((GroupPlan) plan.child(0)).getStats(); + } + } newStats.enforceValid(); // We ensure that the rowCount remains unchanged in order to make the cost of each plan comparable. + final Statistics tmpStats = newStats; if (groupExpression.getOwnerGroup().getStatistics() == null) { boolean isReliable = groupExpression.getPlan().getExpressions().stream() - .noneMatch(e -> newStats.isInputSlotsUnknown(e.getInputSlots())); + .noneMatch(e -> tmpStats.isInputSlotsUnknown(e.getInputSlots())); groupExpression.getOwnerGroup().setStatsReliable(isReliable); groupExpression.getOwnerGroup().setStatistics(newStats); groupExpression.setEstOutputRowCount(newStats.getRowCount()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java index 49056add39..b45610da53 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java @@ -62,7 +62,7 @@ public class GroupPlan extends LogicalLeaf implements BlockFuncDepsPropagation { @Override public Statistics getStats() { - throw new IllegalStateException("GroupPlan can not invoke getStats()"); + return group.getStatistics(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 3f874077f5..ec34b308a7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -2095,6 +2095,12 @@ public class SessionVariable implements Serializable, Writable { "use other health replica when the use_fix_replica meet error" }) public boolean fallbackOtherReplicaWhenFixedCorrupt = false; + public static final String FE_DEBUG = "fe_debug"; + @VariableMgr.VarAttr(name = FE_DEBUG, needForward = true, fuzzy = true, + description = {"when set true, FE will throw exceptions instead swallow them. This is used for test", + "when set true, FE will throw exceptions instead swallow them. This is used for test"}) + public boolean feDebug = false; + @VariableMgr.VarAttr(name = SHOW_ALL_FE_CONNECTION, description = {"when it's true show processlist statement list all fe's connection", "当变量为true时,show processlist命令展示所有fe的连接"}) @@ -2320,6 +2326,7 @@ public class SessionVariable implements Serializable, Writable { public void initFuzzyModeVariables() { Random random = new SecureRandom(); this.parallelExecInstanceNum = random.nextInt(8) + 1; + this.feDebug = true; this.parallelPipelineTaskNum = random.nextInt(8); this.parallelPrepareThreshold = random.nextInt(32) + 1; this.enableCommonExprPushdown = random.nextBoolean(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index bae830e9ed..c28db1512d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -22,6 +22,8 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.coercion.CharacterType; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.Sets; @@ -48,8 +50,8 @@ public class ColumnStatistic { private static final Logger LOG = LogManager.getLogger(ColumnStatistic.class); - public static ColumnStatistic UNKNOWN = new ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1) - .setNumNulls(1).setCount(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY) + public static ColumnStatistic UNKNOWN = new ColumnStatisticBuilder().setCount(1).setAvgSizeByte(1).setNdv(1) + .setNumNulls(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY) .setIsUnknown(true).setUpdatedTime("") .build(); @@ -373,4 +375,34 @@ public class ColumnStatistic { public boolean isUnKnown() { return isUnKnown; } + + public ColumnStatistic withAvgSizeByte(double avgSizeByte) { + return new ColumnStatisticBuilder(this).setAvgSizeByte(avgSizeByte).build(); + } + + public static ColumnStatistic createUnknownByDataType(DataType dataType) { + if (dataType instanceof CharacterType) { + return new ColumnStatisticBuilder() + .setCount(1) + .setAvgSizeByte(Math.max(1, Math.min(dataType.width(), CharacterType.DEFAULT_WIDTH))) + .setNdv(1) + .setNumNulls(1) + .setMaxValue(Double.POSITIVE_INFINITY) + .setMinValue(Double.NEGATIVE_INFINITY) + .setIsUnknown(true) + .setUpdatedTime("") + .build(); + } else { + return new ColumnStatisticBuilder() + .setCount(1) + .setAvgSizeByte(dataType.width()) + .setNdv(1) + .setNumNulls(1) + .setMaxValue(Double.POSITIVE_INFINITY) + .setMinValue(Double.NEGATIVE_INFINITY) + .setIsUnknown(true) + .setUpdatedTime("") + .build(); + } + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java index 32e7504a53..725c1462ca 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java @@ -447,4 +447,13 @@ class ExpressionEstimationTest { Assertions.assertEquals(est.avgSizeByte, 1); Assertions.assertEquals(est.numNulls, 1); } + + @Test + public void testThrowException() { + SlotReference a = new SlotReference("a", StringType.INSTANCE); + Cast cast = new Cast(a, DateType.INSTANCE); + // do not throw any exception + ColumnStatistic est = ExpressionEstimation.estimate(cast, null); + Assertions.assertTrue(est.isUnKnown()); + } }