backport: https://github.com/apache/doris/pull/38829
This commit is contained in:
@ -610,21 +610,13 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI
|
||||
continue;
|
||||
}
|
||||
ColumnStatisticsData data = tableStat.getStatsData();
|
||||
try {
|
||||
setStatData(column, data, columnStatisticBuilder, count);
|
||||
} catch (AnalysisException e) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(e);
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
setStatData(column, data, columnStatisticBuilder, count);
|
||||
}
|
||||
|
||||
return Optional.of(columnStatisticBuilder.build());
|
||||
}
|
||||
|
||||
private void setStatData(Column col, ColumnStatisticsData data, ColumnStatisticBuilder builder, long count)
|
||||
throws AnalysisException {
|
||||
private void setStatData(Column col, ColumnStatisticsData data, ColumnStatisticBuilder builder, long count) {
|
||||
long ndv = 0;
|
||||
long nulls = 0;
|
||||
double colSize = 0;
|
||||
|
||||
@ -2547,7 +2547,14 @@ public class ShowExecutor {
|
||||
if (indexName == null) {
|
||||
continue;
|
||||
}
|
||||
columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row)));
|
||||
try {
|
||||
columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row)));
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), row);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -112,99 +112,81 @@ public class ColumnStatistic {
|
||||
public static ColumnStatistic fromResultRow(List<ResultRow> resultRows) {
|
||||
Map<String, ColumnStatistic> partitionIdToColStats = new HashMap<>();
|
||||
ColumnStatistic columnStatistic = null;
|
||||
try {
|
||||
for (ResultRow resultRow : resultRows) {
|
||||
String partId = resultRow.get(6);
|
||||
if (partId == null) {
|
||||
columnStatistic = fromResultRow(resultRow);
|
||||
} else {
|
||||
partitionIdToColStats.put(partId, fromResultRow(resultRow));
|
||||
}
|
||||
for (ResultRow resultRow : resultRows) {
|
||||
String partId = resultRow.get(6);
|
||||
if (partId == null) {
|
||||
columnStatistic = fromResultRow(resultRow);
|
||||
} else {
|
||||
partitionIdToColStats.put(partId, fromResultRow(resultRow));
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Failed to deserialize column stats", t);
|
||||
}
|
||||
return ColumnStatistic.UNKNOWN;
|
||||
}
|
||||
if (columnStatistic == null) {
|
||||
return ColumnStatistic.UNKNOWN;
|
||||
}
|
||||
return columnStatistic;
|
||||
}
|
||||
|
||||
// TODO: use thrift
|
||||
public static ColumnStatistic fromResultRow(ResultRow row) {
|
||||
try {
|
||||
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder();
|
||||
double count = Double.parseDouble(row.get(7));
|
||||
columnStatisticBuilder.setCount(count);
|
||||
double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
|
||||
columnStatisticBuilder.setNdv(ndv);
|
||||
String nullCount = row.getWithDefault(9, "0");
|
||||
columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
|
||||
columnStatisticBuilder.setDataSize(Double
|
||||
.parseDouble(row.getWithDefault(12, "0")));
|
||||
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
|
||||
? 0 : columnStatisticBuilder.getDataSize()
|
||||
/ columnStatisticBuilder.getCount());
|
||||
long catalogId = Long.parseLong(row.get(1));
|
||||
long idxId = Long.parseLong(row.get(4));
|
||||
long dbID = Long.parseLong(row.get(2));
|
||||
long tblId = Long.parseLong(row.get(3));
|
||||
String colName = row.get(5);
|
||||
Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName);
|
||||
if (col == null) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Failed to deserialize column statistics, ctlId: {} dbId: {}"
|
||||
+ "tblId: {} column: {} not exists",
|
||||
catalogId, dbID, tblId, colName);
|
||||
}
|
||||
return ColumnStatistic.UNKNOWN;
|
||||
}
|
||||
String min = row.get(10);
|
||||
String max = row.get(11);
|
||||
if (min != null && !min.equalsIgnoreCase("NULL")) {
|
||||
// Internal catalog get the min/max value using a separate SQL,
|
||||
// and the value is already encoded by base64. Need to handle internal and external catalog separately.
|
||||
if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && min.equalsIgnoreCase("NULL")) {
|
||||
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
|
||||
} else {
|
||||
try {
|
||||
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min));
|
||||
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
|
||||
} catch (AnalysisException e) {
|
||||
LOG.warn("Failed to deserialize column {} min value {}.", col, min, e);
|
||||
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
if (max != null && !max.equalsIgnoreCase("NULL")) {
|
||||
if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && max.equalsIgnoreCase("NULL")) {
|
||||
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
|
||||
} else {
|
||||
try {
|
||||
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max));
|
||||
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
|
||||
} catch (AnalysisException e) {
|
||||
LOG.warn("Failed to deserialize column {} max value {}.", col, max, e);
|
||||
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
|
||||
}
|
||||
columnStatisticBuilder.setUpdatedTime(row.get(13));
|
||||
return columnStatisticBuilder.build();
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), row);
|
||||
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder();
|
||||
double count = Double.parseDouble(row.get(7));
|
||||
columnStatisticBuilder.setCount(count);
|
||||
double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
|
||||
columnStatisticBuilder.setNdv(ndv);
|
||||
String nullCount = row.getWithDefault(9, "0");
|
||||
columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
|
||||
columnStatisticBuilder.setDataSize(Double
|
||||
.parseDouble(row.getWithDefault(12, "0")));
|
||||
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
|
||||
? 0 : columnStatisticBuilder.getDataSize()
|
||||
/ columnStatisticBuilder.getCount());
|
||||
long catalogId = Long.parseLong(row.get(1));
|
||||
long idxId = Long.parseLong(row.get(4));
|
||||
long dbID = Long.parseLong(row.get(2));
|
||||
long tblId = Long.parseLong(row.get(3));
|
||||
String colName = row.get(5);
|
||||
Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName);
|
||||
if (col == null) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(e);
|
||||
LOG.debug("Failed to deserialize column statistics, ctlId: {} dbId: {}"
|
||||
+ "tblId: {} column: {} not exists",
|
||||
catalogId, dbID, tblId, colName);
|
||||
}
|
||||
return ColumnStatistic.UNKNOWN;
|
||||
}
|
||||
String min = row.get(10);
|
||||
String max = row.get(11);
|
||||
if (min != null && !min.equalsIgnoreCase("NULL")) {
|
||||
// Internal catalog get the min/max value using a separate SQL,
|
||||
// and the value is already encoded by base64. Need to handle internal and external catalog separately.
|
||||
if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && min.equalsIgnoreCase("NULL")) {
|
||||
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
|
||||
} else {
|
||||
try {
|
||||
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min));
|
||||
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
|
||||
} catch (AnalysisException e) {
|
||||
LOG.warn("Failed to deserialize column {} min value {}.", col, min, e);
|
||||
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
if (max != null && !max.equalsIgnoreCase("NULL")) {
|
||||
if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && max.equalsIgnoreCase("NULL")) {
|
||||
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
|
||||
} else {
|
||||
try {
|
||||
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max));
|
||||
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
|
||||
} catch (AnalysisException e) {
|
||||
LOG.warn("Failed to deserialize column {} max value {}.", col, max, e);
|
||||
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
|
||||
}
|
||||
columnStatisticBuilder.setUpdatedTime(row.get(13));
|
||||
return columnStatisticBuilder.build();
|
||||
}
|
||||
|
||||
public static boolean isAlmostUnique(double ndv, double rowCount) {
|
||||
|
||||
@ -18,7 +18,6 @@
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.TableIf;
|
||||
import org.apache.doris.qe.InternalQueryExecutionException;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
@ -33,22 +32,14 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic
|
||||
|
||||
@Override
|
||||
protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) {
|
||||
Optional<ColumnStatistic> columnStatistic = Optional.empty();
|
||||
Optional<ColumnStatistic> columnStatistic;
|
||||
try {
|
||||
// Load from statistics table.
|
||||
columnStatistic = loadFromStatsTable(key);
|
||||
if (!columnStatistic.isPresent()) {
|
||||
// Load from data source metadata
|
||||
try {
|
||||
TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId);
|
||||
columnStatistic = table.getColumnStatistic(key.colName);
|
||||
} catch (Exception e) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(String.format("Exception to get column statistics by metadata."
|
||||
+ "[Catalog:{}, DB:{}, Table:{}]",
|
||||
key.catalogId, key.dbId, key.tableId), e);
|
||||
}
|
||||
}
|
||||
TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId);
|
||||
columnStatistic = table.getColumnStatistic(key.colName);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
LOG.warn("Failed to load stats for column [Catalog:{}, DB:{}, Table:{}, Column:{}], Reason: {}",
|
||||
@ -56,6 +47,7 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(t);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (columnStatistic.isPresent()) {
|
||||
// For non-empty table, return UNKNOWN if we can't collect ndv value.
|
||||
@ -69,22 +61,9 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic
|
||||
}
|
||||
|
||||
private Optional<ColumnStatistic> loadFromStatsTable(StatisticsCacheKey key) {
|
||||
List<ResultRow> columnResults = null;
|
||||
try {
|
||||
columnResults = StatisticsRepository.loadColStats(
|
||||
List<ResultRow> columnResults = StatisticsRepository.loadColStats(
|
||||
key.catalogId, key.dbId, key.tableId, key.idxId, key.colName);
|
||||
} catch (InternalQueryExecutionException e) {
|
||||
LOG.info("Failed to load stats for table {} column {}. Reason:{}",
|
||||
key.tableId, key.colName, e.getMessage());
|
||||
return Optional.empty();
|
||||
}
|
||||
ColumnStatistic columnStatistics;
|
||||
try {
|
||||
columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResults);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Exception to deserialize column statistics", e);
|
||||
return Optional.empty();
|
||||
}
|
||||
ColumnStatistic columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResults);
|
||||
if (columnStatistics == null) {
|
||||
return Optional.empty();
|
||||
} else {
|
||||
|
||||
@ -111,11 +111,20 @@ public class StatisticsRepository {
|
||||
|
||||
public static ColumnStatistic queryColumnStatisticsByName(
|
||||
long ctlId, long dbId, long tableId, long indexId, String colName) {
|
||||
ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN;
|
||||
ResultRow resultRow = queryColumnStatisticById(ctlId, dbId, tableId, indexId, colName);
|
||||
if (resultRow == null) {
|
||||
return ColumnStatistic.UNKNOWN;
|
||||
return columnStatistic;
|
||||
}
|
||||
return ColumnStatistic.fromResultRow(resultRow);
|
||||
try {
|
||||
columnStatistic = ColumnStatistic.fromResultRow(resultRow);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), resultRow);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(e);
|
||||
}
|
||||
}
|
||||
return columnStatistic;
|
||||
}
|
||||
|
||||
public static List<ColumnStatistic> queryColumnStatisticsByPartitions(TableName tableName, String colName,
|
||||
|
||||
@ -163,8 +163,7 @@ public class StatisticsUtil {
|
||||
}
|
||||
}
|
||||
|
||||
public static ColumnStatistic deserializeToColumnStatistics(List<ResultRow> resultBatches)
|
||||
throws Exception {
|
||||
public static ColumnStatistic deserializeToColumnStatistics(List<ResultRow> resultBatches) {
|
||||
if (CollectionUtils.isEmpty(resultBatches)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user