From 4cac07be30553412a223217bbadc81bc993c3cf2 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Thu, 7 Dec 2023 10:16:52 +0800 Subject: [PATCH] [improvement](statistics)Analyze empty table. #28077 Analyze a table even when it's empty. The result should be like this: mysql> show column stats nation; +-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+-------------+---------------------+ | column_name | count | ndv | num_null | data_size | avg_size_byte | min | max | method | type | trigger | query_times | updated_time | +-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+-------------+---------------------+ | n_comment | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A | N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 | | n_nationkey | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A | N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 | | n_regionkey | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A | N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 | | n_name | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A | N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 | +-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+---- --- .../org/apache/doris/catalog/OlapTable.java | 6 +- .../apache/doris/statistics/ColStatsData.java | 13 ++ .../doris/statistics/OlapAnalysisTask.java | 17 +- .../org/apache/doris/statistics/StatsId.java | 10 ++ .../doris/statistics/util/StatisticsUtil.java | 4 + .../suites/statistics/analyze_stats.groovy | 163 +++++++++++++++++- 6 files changed, 202 insertions(+), 11 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 744e6cf9ad..2284164363 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -1155,11 +1155,6 @@ public class OlapTable extends Table { if (tblStats == null) { return true; } - long rowCount = getRowCount(); - // TODO: Do we need to analyze an empty table? - if (rowCount == 0) { - return false; - } if (!tblStats.analyzeColumns().containsAll(getBaseSchema() .stream() .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) @@ -1167,6 +1162,7 @@ public class OlapTable extends Table { .collect(Collectors.toSet()))) { return true; } + long rowCount = getRowCount(); long updateRows = tblStats.updatedRows.get(); int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows); return tblHealth < StatisticsUtil.getTableStatsHealthThreshold(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java index 7878a06548..460475198e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java @@ -21,6 +21,8 @@ import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.annotations.VisibleForTesting; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; import java.util.StringJoiner; /** @@ -66,6 +68,17 @@ public class ColStatsData { updateTime = null; } + public ColStatsData(StatsId statsId) { + this.statsId = statsId; + count = 0; + ndv = 0; + nullCount = 0; + minLit = null; + maxLit = null; + dataSizeInBytes = 0; + updateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); + } + public ColStatsData(ResultRow row) { this.statsId = new StatsId(row); this.count = (long) Double.parseDouble(row.get(7)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 89ed24e790..4d414563eb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -33,6 +33,7 @@ import org.apache.commons.text.StringSubstitutor; import java.security.SecureRandom; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -61,9 +62,9 @@ public class OlapAnalysisTask extends BaseAnalysisTask { public void doExecute() throws Exception { Set partitionNames = info.colToPartitions.get(info.colName); if (partitionNames.isEmpty()) { - LOG.debug("Skip empty empty partition task for column {} in {}.{}.{}", - info.catalogId, info.dbId, info.tblId, info.colName); - job.appendBuf(this, Collections.emptyList()); + StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId, + info.tblId, info.indexId, info.colName, null); + job.appendBuf(this, Arrays.asList(new ColStatsData(statsId))); return; } if (tableSample != null) { @@ -308,4 +309,14 @@ public class OlapAnalysisTask extends BaseAnalysisTask { && keysNum == 1 && (keysType.equals(KeysType.UNIQUE_KEYS) || keysType.equals(KeysType.AGG_KEYS)); } + + protected String concatColumnStatsId() { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append(info.tblId); + stringBuilder.append("-"); + stringBuilder.append(info.indexId); + stringBuilder.append("-"); + stringBuilder.append(info.colName); + return stringBuilder.toString(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java index 7cd8817a1a..a636054cc0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java @@ -57,6 +57,16 @@ public class StatsId { this.partId = row.get(6); } + public StatsId(String id, long catalogId, long dbId, long tblId, long idxId, String colId, String partId) { + this.id = id; + this.catalogId = catalogId; + this.dbId = dbId; + this.tblId = tblId; + this.idxId = idxId; + this.colId = colId; + this.partId = partId; + } + public String toSQL() { StringJoiner sj = new StringJoiner(","); sj.add(StatisticsUtil.quote(id)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index d5bdf8bf05..d8e10c0b99 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -527,6 +527,10 @@ public class StatisticsUtil { * @return Health, the value range is [0, 100], the larger the value, the healthier the statistics of the table. */ public static int getTableHealth(long totalRows, long updatedRows) { + // Avoid analyze empty table every time. + if (totalRows == 0 && updatedRows == 0) { + return 100; + } if (updatedRows >= totalRows) { return 0; } else { diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 9a06ed772b..f9a5ede91b 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -1244,7 +1244,6 @@ PARTITION `p599` VALUES IN (599) assert all_finished(show_result) - // Test truncate table will drop table stats too. sql """ANALYZE TABLE ${tbl} WITH SYNC""" def result_before_truncate = sql """show column stats ${tbl}""" @@ -1255,8 +1254,6 @@ PARTITION `p599` VALUES IN (599) result_after_truncate = sql """show column cached stats ${tbl}""" assertEquals(0, result_after_truncate.size()) - - sql """ delete from ${tbl} where analyzetestlimitedk3 >= -2147483648 """ @@ -1277,4 +1274,164 @@ PARTITION `p599` VALUES IN (599) assert "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111" == truncate_test_result[0][6].substring(1, 1025) assert "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111" == truncate_test_result[0][7].substring(1, 1025) + sql """TRUNCATE TABLE ${tbl}""" + result_after_truncate = sql """show column stats ${tbl}""" + assertEquals(0, result_after_truncate.size()) + sql """ANALYZE TABLE ${tbl} WITH SYNC""" + result_after_truncate = sql """show column stats ${tbl}""" + assertEquals(14, result_after_truncate.size()) + + result = sql """show column stats ${tbl}(analyzetestlimitedk0);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk0", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk1);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk1", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk2);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk2", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk3);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk3", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk4);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk4", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk5);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk5", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk6);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk6", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk7);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk7", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk8);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk8", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk9);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk9", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk10);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk10", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk11);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk11", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk12);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk12", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) + + result = sql """show column stats ${tbl}(analyzetestlimitedk13);""" + assertEquals(1, result.size()) + assertEquals("analyzetestlimitedk13", result[0][0]) + assertEquals("0.0", result[0][1]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("N/A", result[0][6]) + assertEquals("N/A", result[0][7]) }