[improvement](statistics)Analyze empty table. #28077

Analyze a table even when it's empty. The result should be like this:

mysql> show column stats nation;
+-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+-------------+---------------------+
| column_name | count | ndv  | num_null | data_size | avg_size_byte | min  | max  | method | type         | trigger | query_times | updated_time        |
+-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+-------------+---------------------+
| n_comment   | 0.0   | 0.0  | 0.0      | 0.0       | 0.0           | N/A  | N/A  | FULL   | FUNDAMENTALS | MANUAL  | 0           | 2023-12-06 19:22:09 |
| n_nationkey | 0.0   | 0.0  | 0.0      | 0.0       | 0.0           | N/A  | N/A  | FULL   | FUNDAMENTALS | MANUAL  | 0           | 2023-12-06 19:22:09 |
| n_regionkey | 0.0   | 0.0  | 0.0      | 0.0       | 0.0           | N/A  | N/A  | FULL   | FUNDAMENTALS | MANUAL  | 0           | 2023-12-06 19:22:09 |
| n_name      | 0.0   | 0.0  | 0.0      | 0.0       | 0.0           | N/A  | N/A  | FULL   | FUNDAMENTALS | MANUAL  | 0           | 2023-12-06 19:22:09 |
+-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+----
This commit is contained in:
Jibing-Li
2023-12-07 10:16:52 +08:00
committed by GitHub
parent 42b3dd35bb
commit 4cac07be30
6 changed files with 202 additions and 11 deletions

View File

@ -1155,11 +1155,6 @@ public class OlapTable extends Table {
if (tblStats == null) {
return true;
}
long rowCount = getRowCount();
// TODO: Do we need to analyze an empty table?
if (rowCount == 0) {
return false;
}
if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
.stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
@ -1167,6 +1162,7 @@ public class OlapTable extends Table {
.collect(Collectors.toSet()))) {
return true;
}
long rowCount = getRowCount();
long updateRows = tblStats.updatedRows.get();
int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();

View File

@ -21,6 +21,8 @@ import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.annotations.VisibleForTesting;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.StringJoiner;
/**
@ -66,6 +68,17 @@ public class ColStatsData {
updateTime = null;
}
public ColStatsData(StatsId statsId) {
this.statsId = statsId;
count = 0;
ndv = 0;
nullCount = 0;
minLit = null;
maxLit = null;
dataSizeInBytes = 0;
updateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
}
public ColStatsData(ResultRow row) {
this.statsId = new StatsId(row);
this.count = (long) Double.parseDouble(row.get(7));

View File

@ -33,6 +33,7 @@ import org.apache.commons.text.StringSubstitutor;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@ -61,9 +62,9 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
public void doExecute() throws Exception {
Set<String> partitionNames = info.colToPartitions.get(info.colName);
if (partitionNames.isEmpty()) {
LOG.debug("Skip empty empty partition task for column {} in {}.{}.{}",
info.catalogId, info.dbId, info.tblId, info.colName);
job.appendBuf(this, Collections.emptyList());
StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId,
info.tblId, info.indexId, info.colName, null);
job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
return;
}
if (tableSample != null) {
@ -308,4 +309,14 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
&& keysNum == 1
&& (keysType.equals(KeysType.UNIQUE_KEYS) || keysType.equals(KeysType.AGG_KEYS));
}
protected String concatColumnStatsId() {
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append(info.tblId);
stringBuilder.append("-");
stringBuilder.append(info.indexId);
stringBuilder.append("-");
stringBuilder.append(info.colName);
return stringBuilder.toString();
}
}

View File

@ -57,6 +57,16 @@ public class StatsId {
this.partId = row.get(6);
}
public StatsId(String id, long catalogId, long dbId, long tblId, long idxId, String colId, String partId) {
this.id = id;
this.catalogId = catalogId;
this.dbId = dbId;
this.tblId = tblId;
this.idxId = idxId;
this.colId = colId;
this.partId = partId;
}
public String toSQL() {
StringJoiner sj = new StringJoiner(",");
sj.add(StatisticsUtil.quote(id));

View File

@ -527,6 +527,10 @@ public class StatisticsUtil {
* @return Health, the value range is [0, 100], the larger the value, the healthier the statistics of the table.
*/
public static int getTableHealth(long totalRows, long updatedRows) {
// Avoid analyze empty table every time.
if (totalRows == 0 && updatedRows == 0) {
return 100;
}
if (updatedRows >= totalRows) {
return 0;
} else {

View File

@ -1244,7 +1244,6 @@ PARTITION `p599` VALUES IN (599)
assert all_finished(show_result)
// Test truncate table will drop table stats too.
sql """ANALYZE TABLE ${tbl} WITH SYNC"""
def result_before_truncate = sql """show column stats ${tbl}"""
@ -1255,8 +1254,6 @@ PARTITION `p599` VALUES IN (599)
result_after_truncate = sql """show column cached stats ${tbl}"""
assertEquals(0, result_after_truncate.size())
sql """
delete from ${tbl} where analyzetestlimitedk3 >= -2147483648
"""
@ -1277,4 +1274,164 @@ PARTITION `p599` VALUES IN (599)
assert "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111" == truncate_test_result[0][6].substring(1, 1025)
assert "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111" == truncate_test_result[0][7].substring(1, 1025)
sql """TRUNCATE TABLE ${tbl}"""
result_after_truncate = sql """show column stats ${tbl}"""
assertEquals(0, result_after_truncate.size())
sql """ANALYZE TABLE ${tbl} WITH SYNC"""
result_after_truncate = sql """show column stats ${tbl}"""
assertEquals(14, result_after_truncate.size())
result = sql """show column stats ${tbl}(analyzetestlimitedk0);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk0", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk1);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk1", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk2);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk2", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk3);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk3", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk4);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk4", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk5);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk5", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk6);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk6", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk7);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk7", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk8);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk8", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk9);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk9", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk10);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk10", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk11);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk11", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk12);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk12", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
result = sql """show column stats ${tbl}(analyzetestlimitedk13);"""
assertEquals(1, result.size())
assertEquals("analyzetestlimitedk13", result[0][0])
assertEquals("0.0", result[0][1])
assertEquals("0.0", result[0][2])
assertEquals("0.0", result[0][3])
assertEquals("0.0", result[0][4])
assertEquals("0.0", result[0][5])
assertEquals("N/A", result[0][6])
assertEquals("N/A", result[0][7])
}