Revert "[fix](statistics) Use column update rows to decide min/max stats are valid or not (#34263)"

This reverts commit 8db4d48731688354d6ee3ae22e02041419ca73e0.
This commit is contained in:
yiguolei
2024-05-07 08:09:37 +08:00
parent a33715bc1c
commit 63cd632abe

View File

@ -19,7 +19,6 @@ package org.apache.doris.nereids.stats;
import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
@ -125,7 +124,6 @@ import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.util.PlanUtils;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.AnalysisManager;
import org.apache.doris.statistics.ColStatsMeta;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.Histogram;
@ -766,10 +764,10 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
Set<SlotReference> slotSet = slotSetBuilder.build();
Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap = new HashMap<>();
TableIf table = catalogRelation.getTable();
boolean isOlapTable = table instanceof OlapTable;
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId());
long tableUpdatedRows = tableMeta == null ? 0 : tableMeta.updatedRows.get();
// rows newly updated after last analyze
long deltaRowCount = tableMeta == null ? 0 : tableMeta.updatedRows.get();
double rowCount = catalogRelation.getTable().getRowCountForNereids();
boolean hasUnknownCol = false;
long idxId = -1;
@ -779,6 +777,10 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
idxId = olapScan.getSelectedIndexId();
}
}
if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
LOG.debug("{} is partially analyzed, clear min/max values in column stats",
catalogRelation.getTable().getName());
}
for (SlotReference slotReference : slotSet) {
String colName = slotReference.getColumn().isPresent()
? slotReference.getColumn().get().getName()
@ -788,13 +790,6 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
if (colName == null) {
throw new RuntimeException(String.format("Invalid slot: %s", slotReference.getExprId()));
}
long deltaRowCount = 0;
if (isOlapTable) {
OlapTable olapTable = (OlapTable) table;
ColStatsMeta colMeta = tableMeta == null ? null : tableMeta.findColumnStatsMeta(
olapTable.getIndexNameById(idxId == -1 ? olapTable.getBaseIndexId() : idxId), colName);
deltaRowCount = colMeta == null ? 0 : tableUpdatedRows - colMeta.updatedRows;
}
ColumnStatistic cache;
if (!FeConstants.enableInternalSchemaDb
|| shouldIgnoreThisCol) {
@ -812,20 +807,13 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
hasUnknownCol = true;
}
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) {
// deltaRowCount > 0 indicates that
// new data is loaded to the table after this column was analyzed last time.
// In this case, need to eliminate min/max value for this column.
if (deltaRowCount > 0) {
// clear min-max to avoid error estimation
// for example, after yesterday data loaded, user send query about yesterday immediately.
// since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer
// estimates the filter result is zero
colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
if (LOG.isDebugEnabled()) {
LOG.debug("{}.{} is partially analyzed, clear min/max values in column stats",
table.getName(), colName);
}
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
}
columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
} else {