Return UNKNOWN column stats if ndv is 0. (#31439)

This commit is contained in:
Jibing-Li
2024-02-28 16:36:33 +08:00
committed by yiguolei
parent e8a21b529e
commit 3ca412efe3
6 changed files with 49 additions and 15 deletions

View File

@ -131,6 +131,11 @@ public class ColStatsData {
}
public ColumnStatistic toColumnStatistic() {
// For non-empty table, return UNKNOWN if we can't collect ndv value.
// Because inaccurate ndv is very misleading.
if (count > 0 && ndv == 0 && count != nullCount) {
return ColumnStatistic.UNKNOWN;
}
try {
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder();
columnStatisticBuilder.setCount(count);

View File

@ -25,8 +25,8 @@ public class ColumnStatisticBuilder {
private double avgSizeByte;
private double numNulls;
private double dataSize;
private double minValue;
private double maxValue;
private double minValue = Double.NEGATIVE_INFINITY;
private double maxValue = Double.POSITIVE_INFINITY;
private LiteralExpr minExpr;
private LiteralExpr maxExpr;

View File

@ -37,18 +37,17 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic
try {
// Load from statistics table.
columnStatistic = loadFromStatsTable(key);
if (columnStatistic.isPresent()) {
return columnStatistic;
}
// Load from data source metadata
try {
TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId);
columnStatistic = table.getColumnStatistic(key.colName);
} catch (Exception e) {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Exception to get column statistics by metadata."
+ "[Catalog:{}, DB:{}, Table:{}]",
key.catalogId, key.dbId, key.tableId), e);
if (!columnStatistic.isPresent()) {
// Load from data source metadata
try {
TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId);
columnStatistic = table.getColumnStatistic(key.colName);
} catch (Exception e) {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Exception to get column statistics by metadata."
+ "[Catalog:{}, DB:{}, Table:{}]",
key.catalogId, key.dbId, key.tableId), e);
}
}
}
} catch (Throwable t) {
@ -58,6 +57,14 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic
LOG.debug(t);
}
}
if (columnStatistic.isPresent()) {
// For non-empty table, return UNKNOWN if we can't collect ndv value.
// Because inaccurate ndv is very misleading.
ColumnStatistic stats = columnStatistic.get();
if (stats.count > 0 && stats.ndv == 0 && stats.count != stats.numNulls) {
columnStatistic = Optional.of(ColumnStatistic.UNKNOWN);
}
}
return columnStatistic;
}

View File

@ -173,7 +173,10 @@ public class StatisticsCache {
String colId = statsId.colId;
final StatisticsCacheKey k =
new StatisticsCacheKey(tblId, idxId, colId);
final ColumnStatistic c = ColumnStatistic.fromResultRow(r);
ColumnStatistic c = ColumnStatistic.fromResultRow(r);
if (c.count > 0 && c.ndv == 0 && c.count != c.numNulls) {
c = ColumnStatistic.UNKNOWN;
}
putCache(k, c);
} catch (Throwable t) {
LOG.warn("Error when preheating stats cache", t);