branch-2.1 [opt](nereids) catch all exceptions in StatsCalculator (#49415) (#50364)

This commit is contained in:
minghong
2025-05-09 11:24:18 +08:00
committed by GitHub
parent 9422c973af
commit fde8d05f5d
6 changed files with 90 additions and 10 deletions

View File

@ -94,12 +94,15 @@ import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import org.apache.commons.collections.CollectionUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.time.Instant;
import java.time.LocalDate;
@ -111,7 +114,7 @@ import java.util.List;
* Used to estimate for expressions that not producing boolean value.
*/
public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Statistics> {
public static final Logger LOG = LogManager.getLogger(ExpressionEstimation.class);
public static final long DAYS_FROM_0_TO_1970 = 719528;
public static final long DAYS_FROM_0_TO_9999 = 3652424;
private static final ExpressionEstimation INSTANCE = new ExpressionEstimation();
@ -120,11 +123,20 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
* returned columnStat is newly created or a copy of stats
*/
public static ColumnStatistic estimate(Expression expression, Statistics stats) {
ColumnStatistic columnStatistic = expression.accept(INSTANCE, stats);
if (columnStatistic == null) {
return ColumnStatistic.UNKNOWN;
try {
ColumnStatistic columnStatistic = expression.accept(INSTANCE, stats);
if (columnStatistic == null) {
return ColumnStatistic.createUnknownByDataType(expression.getDataType());
}
return columnStatistic;
} catch (Exception e) {
// in regression test, feDebug is true so that the exception is thrown in order to detect problems.
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().feDebug) {
throw e;
}
LOG.warn("ExpressionEstimation failed : " + expression, e);
return ColumnStatistic.createUnknownByDataType(expression.getDataType());
}
return columnStatistic;
}
@Override

View File

@ -245,13 +245,33 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
private void estimate() {
Plan plan = groupExpression.getPlan();
Statistics newStats = plan.accept(this, null);
Statistics newStats;
try {
newStats = plan.accept(this, null);
} catch (Exception e) {
// throw exception in debug mode
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().feDebug) {
throw e;
}
LOG.warn("stats calculation failed, plan " + plan.toString(), e);
// use unknown stats or the first child's stats
if (plan.children().isEmpty() || !(plan.child(0) instanceof GroupPlan)) {
Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
for (Slot slot : plan.getOutput()) {
columnStatisticMap.put(slot, ColumnStatistic.createUnknownByDataType(slot.getDataType()));
}
newStats = new Statistics(1, 1, columnStatisticMap);
} else {
newStats = ((GroupPlan) plan.child(0)).getStats();
}
}
newStats.enforceValid();
// We ensure that the rowCount remains unchanged in order to make the cost of each plan comparable.
final Statistics tmpStats = newStats;
if (groupExpression.getOwnerGroup().getStatistics() == null) {
boolean isReliable = groupExpression.getPlan().getExpressions().stream()
.noneMatch(e -> newStats.isInputSlotsUnknown(e.getInputSlots()));
.noneMatch(e -> tmpStats.isInputSlotsUnknown(e.getInputSlots()));
groupExpression.getOwnerGroup().setStatsReliable(isReliable);
groupExpression.getOwnerGroup().setStatistics(newStats);
groupExpression.setEstOutputRowCount(newStats.getRowCount());

View File

@ -62,7 +62,7 @@ public class GroupPlan extends LogicalLeaf implements BlockFuncDepsPropagation {
@Override
public Statistics getStats() {
throw new IllegalStateException("GroupPlan can not invoke getStats()");
return group.getStatistics();
}
@Override

View File

@ -2095,6 +2095,12 @@ public class SessionVariable implements Serializable, Writable {
"use other health replica when the use_fix_replica meet error" })
public boolean fallbackOtherReplicaWhenFixedCorrupt = false;
public static final String FE_DEBUG = "fe_debug";
@VariableMgr.VarAttr(name = FE_DEBUG, needForward = true, fuzzy = true,
description = {"when set true, FE will throw exceptions instead swallow them. This is used for test",
"when set true, FE will throw exceptions instead swallow them. This is used for test"})
public boolean feDebug = false;
@VariableMgr.VarAttr(name = SHOW_ALL_FE_CONNECTION,
description = {"when it's true show processlist statement list all fe's connection",
"当变量为true时,show processlist命令展示所有fe的连接"})
@ -2320,6 +2326,7 @@ public class SessionVariable implements Serializable, Writable {
public void initFuzzyModeVariables() {
Random random = new SecureRandom();
this.parallelExecInstanceNum = random.nextInt(8) + 1;
this.feDebug = true;
this.parallelPipelineTaskNum = random.nextInt(8);
this.parallelPrepareThreshold = random.nextInt(32) + 1;
this.enableCommonExprPushdown = random.nextBoolean();

View File

@ -22,6 +22,8 @@ import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.types.coercion.CharacterType;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.collect.Sets;
@ -48,8 +50,8 @@ public class ColumnStatistic {
private static final Logger LOG = LogManager.getLogger(ColumnStatistic.class);
public static ColumnStatistic UNKNOWN = new ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1)
.setNumNulls(1).setCount(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY)
public static ColumnStatistic UNKNOWN = new ColumnStatisticBuilder().setCount(1).setAvgSizeByte(1).setNdv(1)
.setNumNulls(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY)
.setIsUnknown(true).setUpdatedTime("")
.build();
@ -373,4 +375,34 @@ public class ColumnStatistic {
public boolean isUnKnown() {
return isUnKnown;
}
public ColumnStatistic withAvgSizeByte(double avgSizeByte) {
return new ColumnStatisticBuilder(this).setAvgSizeByte(avgSizeByte).build();
}
public static ColumnStatistic createUnknownByDataType(DataType dataType) {
if (dataType instanceof CharacterType) {
return new ColumnStatisticBuilder()
.setCount(1)
.setAvgSizeByte(Math.max(1, Math.min(dataType.width(), CharacterType.DEFAULT_WIDTH)))
.setNdv(1)
.setNumNulls(1)
.setMaxValue(Double.POSITIVE_INFINITY)
.setMinValue(Double.NEGATIVE_INFINITY)
.setIsUnknown(true)
.setUpdatedTime("")
.build();
} else {
return new ColumnStatisticBuilder()
.setCount(1)
.setAvgSizeByte(dataType.width())
.setNdv(1)
.setNumNulls(1)
.setMaxValue(Double.POSITIVE_INFINITY)
.setMinValue(Double.NEGATIVE_INFINITY)
.setIsUnknown(true)
.setUpdatedTime("")
.build();
}
}
}

View File

@ -447,4 +447,13 @@ class ExpressionEstimationTest {
Assertions.assertEquals(est.avgSizeByte, 1);
Assertions.assertEquals(est.numNulls, 1);
}
@Test
public void testThrowException() {
SlotReference a = new SlotReference("a", StringType.INSTANCE);
Cast cast = new Cast(a, DateType.INSTANCE);
// do not throw any exception
ColumnStatistic est = ExpressionEstimation.estimate(cast, null);
Assertions.assertTrue(est.isUnKnown());
}
}