[improvement](statistics)Analyze empty table. #28077
Analyze a table even when it's empty. The result should be like this: mysql> show column stats nation; +-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+-------------+---------------------+ | column_name | count | ndv | num_null | data_size | avg_size_byte | min | max | method | type | trigger | query_times | updated_time | +-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+-------------+---------------------+ | n_comment | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A | N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 | | n_nationkey | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A | N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 | | n_regionkey | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A | N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 | | n_name | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | N/A | N/A | FULL | FUNDAMENTALS | MANUAL | 0 | 2023-12-06 19:22:09 | +-------------+-------+------+----------+-----------+---------------+------+------+--------+--------------+---------+----
This commit is contained in:
@ -1155,11 +1155,6 @@ public class OlapTable extends Table {
|
||||
if (tblStats == null) {
|
||||
return true;
|
||||
}
|
||||
long rowCount = getRowCount();
|
||||
// TODO: Do we need to analyze an empty table?
|
||||
if (rowCount == 0) {
|
||||
return false;
|
||||
}
|
||||
if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
|
||||
.stream()
|
||||
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
|
||||
@ -1167,6 +1162,7 @@ public class OlapTable extends Table {
|
||||
.collect(Collectors.toSet()))) {
|
||||
return true;
|
||||
}
|
||||
long rowCount = getRowCount();
|
||||
long updateRows = tblStats.updatedRows.get();
|
||||
int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
|
||||
return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();
|
||||
|
||||
@ -21,6 +21,8 @@ import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
/**
|
||||
@ -66,6 +68,17 @@ public class ColStatsData {
|
||||
updateTime = null;
|
||||
}
|
||||
|
||||
public ColStatsData(StatsId statsId) {
|
||||
this.statsId = statsId;
|
||||
count = 0;
|
||||
ndv = 0;
|
||||
nullCount = 0;
|
||||
minLit = null;
|
||||
maxLit = null;
|
||||
dataSizeInBytes = 0;
|
||||
updateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
|
||||
}
|
||||
|
||||
public ColStatsData(ResultRow row) {
|
||||
this.statsId = new StatsId(row);
|
||||
this.count = (long) Double.parseDouble(row.get(7));
|
||||
|
||||
@ -33,6 +33,7 @@ import org.apache.commons.text.StringSubstitutor;
|
||||
|
||||
import java.security.SecureRandom;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@ -61,9 +62,9 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
public void doExecute() throws Exception {
|
||||
Set<String> partitionNames = info.colToPartitions.get(info.colName);
|
||||
if (partitionNames.isEmpty()) {
|
||||
LOG.debug("Skip empty empty partition task for column {} in {}.{}.{}",
|
||||
info.catalogId, info.dbId, info.tblId, info.colName);
|
||||
job.appendBuf(this, Collections.emptyList());
|
||||
StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId,
|
||||
info.tblId, info.indexId, info.colName, null);
|
||||
job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
|
||||
return;
|
||||
}
|
||||
if (tableSample != null) {
|
||||
@ -308,4 +309,14 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
&& keysNum == 1
|
||||
&& (keysType.equals(KeysType.UNIQUE_KEYS) || keysType.equals(KeysType.AGG_KEYS));
|
||||
}
|
||||
|
||||
protected String concatColumnStatsId() {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
stringBuilder.append(info.tblId);
|
||||
stringBuilder.append("-");
|
||||
stringBuilder.append(info.indexId);
|
||||
stringBuilder.append("-");
|
||||
stringBuilder.append(info.colName);
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@ -57,6 +57,16 @@ public class StatsId {
|
||||
this.partId = row.get(6);
|
||||
}
|
||||
|
||||
public StatsId(String id, long catalogId, long dbId, long tblId, long idxId, String colId, String partId) {
|
||||
this.id = id;
|
||||
this.catalogId = catalogId;
|
||||
this.dbId = dbId;
|
||||
this.tblId = tblId;
|
||||
this.idxId = idxId;
|
||||
this.colId = colId;
|
||||
this.partId = partId;
|
||||
}
|
||||
|
||||
public String toSQL() {
|
||||
StringJoiner sj = new StringJoiner(",");
|
||||
sj.add(StatisticsUtil.quote(id));
|
||||
|
||||
@ -527,6 +527,10 @@ public class StatisticsUtil {
|
||||
* @return Health, the value range is [0, 100], the larger the value, the healthier the statistics of the table.
|
||||
*/
|
||||
public static int getTableHealth(long totalRows, long updatedRows) {
|
||||
// Avoid analyze empty table every time.
|
||||
if (totalRows == 0 && updatedRows == 0) {
|
||||
return 100;
|
||||
}
|
||||
if (updatedRows >= totalRows) {
|
||||
return 0;
|
||||
} else {
|
||||
|
||||
@ -1244,7 +1244,6 @@ PARTITION `p599` VALUES IN (599)
|
||||
|
||||
assert all_finished(show_result)
|
||||
|
||||
|
||||
// Test truncate table will drop table stats too.
|
||||
sql """ANALYZE TABLE ${tbl} WITH SYNC"""
|
||||
def result_before_truncate = sql """show column stats ${tbl}"""
|
||||
@ -1255,8 +1254,6 @@ PARTITION `p599` VALUES IN (599)
|
||||
result_after_truncate = sql """show column cached stats ${tbl}"""
|
||||
assertEquals(0, result_after_truncate.size())
|
||||
|
||||
|
||||
|
||||
sql """
|
||||
delete from ${tbl} where analyzetestlimitedk3 >= -2147483648
|
||||
"""
|
||||
@ -1277,4 +1274,164 @@ PARTITION `p599` VALUES IN (599)
|
||||
asserttruncate_test_result[0][6].substring(1, 1025)
|
||||
asserttruncate_test_result[0][7].substring(1, 1025)
|
||||
|
||||
sql """TRUNCATE TABLE ${tbl}"""
|
||||
result_after_truncate = sql """show column stats ${tbl}"""
|
||||
assertEquals(0, result_after_truncate.size())
|
||||
sql """ANALYZE TABLE ${tbl} WITH SYNC"""
|
||||
result_after_truncate = sql """show column stats ${tbl}"""
|
||||
assertEquals(14, result_after_truncate.size())
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk0);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk0", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk1);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk1", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk2);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk2", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk3);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk3", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk4);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk4", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk5);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk5", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk6);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk6", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk7);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk7", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk8);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk8", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk9);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk9", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk10);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk10", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk11);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk11", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk12);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk12", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
|
||||
result = sql """show column stats ${tbl}(analyzetestlimitedk13);"""
|
||||
assertEquals(1, result.size())
|
||||
assertEquals("analyzetestlimitedk13", result[0][0])
|
||||
assertEquals("0.0", result[0][1])
|
||||
assertEquals("0.0", result[0][2])
|
||||
assertEquals("0.0", result[0][3])
|
||||
assertEquals("0.0", result[0][4])
|
||||
assertEquals("0.0", result[0][5])
|
||||
assertEquals("N/A", result[0][6])
|
||||
assertEquals("N/A", result[0][7])
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user