Return UNKNOWN column stats if ndv is 0. (#31439)
This commit is contained in:
@ -131,6 +131,11 @@ public class ColStatsData {
|
||||
}
|
||||
|
||||
public ColumnStatistic toColumnStatistic() {
|
||||
// For non-empty table, return UNKNOWN if we can't collect ndv value.
|
||||
// Because inaccurate ndv is very misleading.
|
||||
if (count > 0 && ndv == 0 && count != nullCount) {
|
||||
return ColumnStatistic.UNKNOWN;
|
||||
}
|
||||
try {
|
||||
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder();
|
||||
columnStatisticBuilder.setCount(count);
|
||||
|
||||
@ -25,8 +25,8 @@ public class ColumnStatisticBuilder {
|
||||
private double avgSizeByte;
|
||||
private double numNulls;
|
||||
private double dataSize;
|
||||
private double minValue;
|
||||
private double maxValue;
|
||||
private double minValue = Double.NEGATIVE_INFINITY;
|
||||
private double maxValue = Double.POSITIVE_INFINITY;
|
||||
private LiteralExpr minExpr;
|
||||
private LiteralExpr maxExpr;
|
||||
|
||||
|
||||
@ -37,18 +37,17 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic
|
||||
try {
|
||||
// Load from statistics table.
|
||||
columnStatistic = loadFromStatsTable(key);
|
||||
if (columnStatistic.isPresent()) {
|
||||
return columnStatistic;
|
||||
}
|
||||
// Load from data source metadata
|
||||
try {
|
||||
TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId);
|
||||
columnStatistic = table.getColumnStatistic(key.colName);
|
||||
} catch (Exception e) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(String.format("Exception to get column statistics by metadata."
|
||||
+ "[Catalog:{}, DB:{}, Table:{}]",
|
||||
key.catalogId, key.dbId, key.tableId), e);
|
||||
if (!columnStatistic.isPresent()) {
|
||||
// Load from data source metadata
|
||||
try {
|
||||
TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId);
|
||||
columnStatistic = table.getColumnStatistic(key.colName);
|
||||
} catch (Exception e) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(String.format("Exception to get column statistics by metadata."
|
||||
+ "[Catalog:{}, DB:{}, Table:{}]",
|
||||
key.catalogId, key.dbId, key.tableId), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
@ -58,6 +57,14 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic
|
||||
LOG.debug(t);
|
||||
}
|
||||
}
|
||||
if (columnStatistic.isPresent()) {
|
||||
// For non-empty table, return UNKNOWN if we can't collect ndv value.
|
||||
// Because inaccurate ndv is very misleading.
|
||||
ColumnStatistic stats = columnStatistic.get();
|
||||
if (stats.count > 0 && stats.ndv == 0 && stats.count != stats.numNulls) {
|
||||
columnStatistic = Optional.of(ColumnStatistic.UNKNOWN);
|
||||
}
|
||||
}
|
||||
return columnStatistic;
|
||||
}
|
||||
|
||||
|
||||
@ -173,7 +173,10 @@ public class StatisticsCache {
|
||||
String colId = statsId.colId;
|
||||
final StatisticsCacheKey k =
|
||||
new StatisticsCacheKey(tblId, idxId, colId);
|
||||
final ColumnStatistic c = ColumnStatistic.fromResultRow(r);
|
||||
ColumnStatistic c = ColumnStatistic.fromResultRow(r);
|
||||
if (c.count > 0 && c.ndv == 0 && c.count != c.numNulls) {
|
||||
c = ColumnStatistic.UNKNOWN;
|
||||
}
|
||||
putCache(k, c);
|
||||
} catch (Throwable t) {
|
||||
LOG.warn("Error when preheating stats cache", t);
|
||||
|
||||
Reference in New Issue
Block a user