From 01090cf61f728702436baeab1ee1158aa5ac3e9b Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:01:24 +0800 Subject: [PATCH] [improvement](statistics)Improve statistics cache loading logic. (#38829) (#39410) backport: https://github.com/apache/doris/pull/38829 --- .../datasource/hive/HMSExternalTable.java | 12 +- .../org/apache/doris/qe/ShowExecutor.java | 9 +- .../doris/statistics/ColumnStatistic.java | 146 ++++++++---------- .../ColumnStatisticsCacheLoader.java | 33 +--- .../statistics/StatisticsRepository.java | 13 +- .../doris/statistics/util/StatisticsUtil.java | 3 +- .../apache/doris/statistics/CacheTest.java | 44 ++++++ 7 files changed, 136 insertions(+), 124 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 8b0fee92ad..401509049e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -610,21 +610,13 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI continue; } ColumnStatisticsData data = tableStat.getStatsData(); - try { - setStatData(column, data, columnStatisticBuilder, count); - } catch (AnalysisException e) { - if (LOG.isDebugEnabled()) { - LOG.debug(e); - } - return Optional.empty(); - } + setStatData(column, data, columnStatisticBuilder, count); } return Optional.of(columnStatisticBuilder.build()); } - private void setStatData(Column col, ColumnStatisticsData data, ColumnStatisticBuilder builder, long count) - throws AnalysisException { + private void setStatData(Column col, ColumnStatisticsData data, ColumnStatisticBuilder builder, long count) { long ndv = 0; long nulls = 0; double colSize = 0; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index e693f0aa35..5529f3a6a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -2547,7 +2547,14 @@ public class ShowExecutor { if (indexName == null) { continue; } - columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row))); + try { + columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row))); + } catch (Exception e) { + LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), row); + if (LOG.isDebugEnabled()) { + LOG.debug(e); + } + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index c932bceac1..bae830e9ed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -112,99 +112,81 @@ public class ColumnStatistic { public static ColumnStatistic fromResultRow(List resultRows) { Map partitionIdToColStats = new HashMap<>(); ColumnStatistic columnStatistic = null; - try { - for (ResultRow resultRow : resultRows) { - String partId = resultRow.get(6); - if (partId == null) { - columnStatistic = fromResultRow(resultRow); - } else { - partitionIdToColStats.put(partId, fromResultRow(resultRow)); - } + for (ResultRow resultRow : resultRows) { + String partId = resultRow.get(6); + if (partId == null) { + columnStatistic = fromResultRow(resultRow); + } else { + partitionIdToColStats.put(partId, fromResultRow(resultRow)); } - } catch (Throwable t) { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to deserialize column stats", t); - } - return ColumnStatistic.UNKNOWN; - } - if (columnStatistic == null) { - return ColumnStatistic.UNKNOWN; } return columnStatistic; } // TODO: use thrift public static ColumnStatistic fromResultRow(ResultRow row) { - try { - ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(); - double count = Double.parseDouble(row.get(7)); - columnStatisticBuilder.setCount(count); - double ndv = Double.parseDouble(row.getWithDefault(8, "0")); - columnStatisticBuilder.setNdv(ndv); - String nullCount = row.getWithDefault(9, "0"); - columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount)); - columnStatisticBuilder.setDataSize(Double - .parseDouble(row.getWithDefault(12, "0"))); - columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0 - ? 0 : columnStatisticBuilder.getDataSize() - / columnStatisticBuilder.getCount()); - long catalogId = Long.parseLong(row.get(1)); - long idxId = Long.parseLong(row.get(4)); - long dbID = Long.parseLong(row.get(2)); - long tblId = Long.parseLong(row.get(3)); - String colName = row.get(5); - Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName); - if (col == null) { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to deserialize column statistics, ctlId: {} dbId: {}" - + "tblId: {} column: {} not exists", - catalogId, dbID, tblId, colName); - } - return ColumnStatistic.UNKNOWN; - } - String min = row.get(10); - String max = row.get(11); - if (min != null && !min.equalsIgnoreCase("NULL")) { - // Internal catalog get the min/max value using a separate SQL, - // and the value is already encoded by base64. Need to handle internal and external catalog separately. - if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && min.equalsIgnoreCase("NULL")) { - columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); - } else { - try { - columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min)); - columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); - } catch (AnalysisException e) { - LOG.warn("Failed to deserialize column {} min value {}.", col, min, e); - columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); - } - } - } else { - columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); - } - if (max != null && !max.equalsIgnoreCase("NULL")) { - if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && max.equalsIgnoreCase("NULL")) { - columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); - } else { - try { - columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max)); - columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max)); - } catch (AnalysisException e) { - LOG.warn("Failed to deserialize column {} max value {}.", col, max, e); - columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); - } - } - } else { - columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); - } - columnStatisticBuilder.setUpdatedTime(row.get(13)); - return columnStatisticBuilder.build(); - } catch (Exception e) { - LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), row); + ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(); + double count = Double.parseDouble(row.get(7)); + columnStatisticBuilder.setCount(count); + double ndv = Double.parseDouble(row.getWithDefault(8, "0")); + columnStatisticBuilder.setNdv(ndv); + String nullCount = row.getWithDefault(9, "0"); + columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount)); + columnStatisticBuilder.setDataSize(Double + .parseDouble(row.getWithDefault(12, "0"))); + columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0 + ? 0 : columnStatisticBuilder.getDataSize() + / columnStatisticBuilder.getCount()); + long catalogId = Long.parseLong(row.get(1)); + long idxId = Long.parseLong(row.get(4)); + long dbID = Long.parseLong(row.get(2)); + long tblId = Long.parseLong(row.get(3)); + String colName = row.get(5); + Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName); + if (col == null) { if (LOG.isDebugEnabled()) { - LOG.debug(e); + LOG.debug("Failed to deserialize column statistics, ctlId: {} dbId: {}" + + "tblId: {} column: {} not exists", + catalogId, dbID, tblId, colName); } return ColumnStatistic.UNKNOWN; } + String min = row.get(10); + String max = row.get(11); + if (min != null && !min.equalsIgnoreCase("NULL")) { + // Internal catalog get the min/max value using a separate SQL, + // and the value is already encoded by base64. Need to handle internal and external catalog separately. + if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && min.equalsIgnoreCase("NULL")) { + columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); + } else { + try { + columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min)); + columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); + } catch (AnalysisException e) { + LOG.warn("Failed to deserialize column {} min value {}.", col, min, e); + columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); + } + } + } else { + columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); + } + if (max != null && !max.equalsIgnoreCase("NULL")) { + if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && max.equalsIgnoreCase("NULL")) { + columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); + } else { + try { + columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max)); + columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max)); + } catch (AnalysisException e) { + LOG.warn("Failed to deserialize column {} max value {}.", col, max, e); + columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); + } + } + } else { + columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); + } + columnStatisticBuilder.setUpdatedTime(row.get(13)); + return columnStatisticBuilder.build(); } public static boolean isAlmostUnique(double ndv, double rowCount) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java index 056ed7bcee..cca5e34717 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java @@ -18,7 +18,6 @@ package org.apache.doris.statistics; import org.apache.doris.catalog.TableIf; -import org.apache.doris.qe.InternalQueryExecutionException; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.logging.log4j.LogManager; @@ -33,22 +32,14 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader doLoad(StatisticsCacheKey key) { - Optional columnStatistic = Optional.empty(); + Optional columnStatistic; try { // Load from statistics table. columnStatistic = loadFromStatsTable(key); if (!columnStatistic.isPresent()) { // Load from data source metadata - try { - TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId); - columnStatistic = table.getColumnStatistic(key.colName); - } catch (Exception e) { - if (LOG.isDebugEnabled()) { - LOG.debug(String.format("Exception to get column statistics by metadata." - + "[Catalog:{}, DB:{}, Table:{}]", - key.catalogId, key.dbId, key.tableId), e); - } - } + TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId); + columnStatistic = table.getColumnStatistic(key.colName); } } catch (Throwable t) { LOG.warn("Failed to load stats for column [Catalog:{}, DB:{}, Table:{}, Column:{}], Reason: {}", @@ -56,6 +47,7 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader loadFromStatsTable(StatisticsCacheKey key) { - List columnResults = null; - try { - columnResults = StatisticsRepository.loadColStats( + List columnResults = StatisticsRepository.loadColStats( key.catalogId, key.dbId, key.tableId, key.idxId, key.colName); - } catch (InternalQueryExecutionException e) { - LOG.info("Failed to load stats for table {} column {}. Reason:{}", - key.tableId, key.colName, e.getMessage()); - return Optional.empty(); - } - ColumnStatistic columnStatistics; - try { - columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResults); - } catch (Exception e) { - LOG.warn("Exception to deserialize column statistics", e); - return Optional.empty(); - } + ColumnStatistic columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResults); if (columnStatistics == null) { return Optional.empty(); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 4d2aede413..9eaf80ea89 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -111,11 +111,20 @@ public class StatisticsRepository { public static ColumnStatistic queryColumnStatisticsByName( long ctlId, long dbId, long tableId, long indexId, String colName) { + ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN; ResultRow resultRow = queryColumnStatisticById(ctlId, dbId, tableId, indexId, colName); if (resultRow == null) { - return ColumnStatistic.UNKNOWN; + return columnStatistic; } - return ColumnStatistic.fromResultRow(resultRow); + try { + columnStatistic = ColumnStatistic.fromResultRow(resultRow); + } catch (Exception e) { + LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), resultRow); + if (LOG.isDebugEnabled()) { + LOG.debug(e); + } + } + return columnStatistic; } public static List queryColumnStatisticsByPartitions(TableName tableName, String colName, diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 2be007b141..c2c7de48a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -163,8 +163,7 @@ public class StatisticsUtil { } } - public static ColumnStatistic deserializeToColumnStatistics(List resultBatches) - throws Exception { + public static ColumnStatistic deserializeToColumnStatistics(List resultBatches) { if (CollectionUtils.isEmpty(resultBatches)) { return null; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java index 729291d532..9c0b7fff33 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java @@ -388,4 +388,48 @@ public class CacheTest extends TestWithFeService { Thread.sleep(100); Assertions.assertEquals(1, columnStatisticsCache.synchronous().asMap().size()); } + + @Test + public void testLoadWithException() throws Exception { + new MockUp() { + @Mock + protected Optional doLoad(StatisticsCacheKey key) { + return null; + } + }; + StatisticsCache statisticsCache = new StatisticsCache(); + ColumnStatistic columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, "col"); + Thread.sleep(3000); + Assertions.assertTrue(columnStatistic.isUnKnown); + + new MockUp() { + @Mock + protected Optional doLoad(StatisticsCacheKey key) { + return Optional.of(new ColumnStatistic(1, 2, + null, 3, 4, 5, 6, 7, + null, null, false, + new Date().toString())); + } + }; + columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, "col"); + for (int i = 0; i < 60; i++) { + columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, "col"); + if (columnStatistic != ColumnStatistic.UNKNOWN) { + break; + } + System.out.println("Not ready yet."); + Thread.sleep(1000); + } + if (columnStatistic != ColumnStatistic.UNKNOWN) { + Assertions.assertEquals(1, columnStatistic.count); + Assertions.assertEquals(2, columnStatistic.ndv); + Assertions.assertEquals(3, columnStatistic.avgSizeByte); + Assertions.assertEquals(4, columnStatistic.numNulls); + Assertions.assertEquals(5, columnStatistic.dataSize); + Assertions.assertEquals(6, columnStatistic.minValue); + Assertions.assertEquals(7, columnStatistic.maxValue); + } else { + Assertions.fail("Column stats is still unknown"); + } + } }