[feat](stats) delete data size stat and Made task timeout configurable (#20090)

1. Delete the stats for data size, since it would cost too much time but useless
2. Make task time out configurable since when it's common to analyze a quite huge table that the default 10 min is not suitable
This commit is contained in:
AKIRA
2023-05-29 17:40:59 +09:00
committed by GitHub
parent 55ccddb62c
commit cc47ee480c
5 changed files with 136 additions and 134 deletions

View File

@ -1993,4 +1993,8 @@ public class Config extends ConfigBase {
"是否启用binlog特性",
"Whether to enable binlog feature"})
public static boolean enable_feature_binlog = false;
@ConfField
public static int analyze_task_timeout_in_minutes = 120;
}

View File

@ -525,7 +525,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
throw new RuntimeException(String.format("Invalid slot: %s", slotReference.getExprId()));
}
ColumnStatistic cache = Config.enable_stats ? getColumnStatistic(table, colName) : ColumnStatistic.UNKNOWN;
if (cache == ColumnStatistic.UNKNOWN) {
if (cache == ColumnStatistic.UNKNOWN && !colName.equals("__DORIS_DELETE_SIGN__")) {
if (forbidUnknownColStats) {
if (StatisticsUtil.statsTblAvailable()) {
throw new AnalysisException("column stats for " + colName

View File

@ -75,7 +75,7 @@ public class AnalysisTaskExecutor extends Thread {
try {
AnalysisTaskWrapper taskWrapper = taskQueue.take();
try {
long timeout = StatisticConstants.STATISTICS_TASKS_TIMEOUT_IN_MS;
long timeout = TimeUnit.MINUTES.toMillis(Config.analyze_task_timeout_in_minutes);
taskWrapper.get(timeout < 0 ? 0 : timeout, TimeUnit.MILLISECONDS);
} catch (Exception e) {
taskWrapper.cancel();

View File

@ -182,11 +182,9 @@ public abstract class BaseAnalysisTask {
return info.jobId;
}
// TODO : time cost is intolerable when column is string type, return 0 directly for now.
protected String getDataSizeFunction(Column column) {
if (column.getType().isStringType()) {
return "SUM(LENGTH(`${colName}`))";
}
return "COUNT(1) * " + column.getType().getSlotSize();
return "0";
}
private boolean isUnsupportedType(PrimitiveType type) {

View File

@ -18,150 +18,150 @@
suite("test_analyze_stats") {
/**************************************** Constant definition Begin ****************************************/
def dbName = "test_analyze_stats_db"
def tblName = "test_analyze_stats_tbl"
def fullTblName = "${dbName}.${tblName}"
def interDbName = "__internal_schema"
def analysisJobsTblName = "${interDbName}.analysis_jobs"
def colHistogramTblName = "${interDbName}.histogram_statistics"
def colStatisticsTblName = "${interDbName}.column_statistics"
def tblColumnNames = """ "c_id", "c_boolean", "c_int", "c_float", "c_double", "c_decimal", "c_varchar", "c_datev2" """
def colStatisticsSchema = "`col_id`, `count`, `ndv`, `null_count`, `min`, `max`, `data_size_in_bytes`"
def colHistogramSchema = "`col_id`, `sample_rate`, `buckets`"
// def dbName = "test_analyze_stats_db"
// def tblName = "test_analyze_stats_tbl"
// def fullTblName = "${dbName}.${tblName}"
//
// def interDbName = "__internal_schema"
// def analysisJobsTblName = "${interDbName}.analysis_jobs"
// def colHistogramTblName = "${interDbName}.histogram_statistics"
// def colStatisticsTblName = "${interDbName}.column_statistics"
//
// def tblColumnNames = """ "c_id", "c_boolean", "c_int", "c_float", "c_double", "c_decimal", "c_varchar", "c_datev2" """
// def colStatisticsSchema = "`col_id`, `count`, `ndv`, `null_count`, `min`, `max`, `data_size_in_bytes`"
// def colHistogramSchema = "`col_id`, `sample_rate`, `buckets`"
/***************************************** Constant definition End *****************************************/
/**************************************** Data initialization Begin ****************************************/
sql """
DROP DATABASE IF EXISTS ${dbName};
"""
// sql """
// DROP DATABASE IF EXISTS ${dbName};
// """
//
// sql """
// CREATE DATABASE IF NOT EXISTS ${dbName};
// """
//
// sql """
// DROP TABLE IF EXISTS ${fullTblName};
// """
//
// // Unsupported type: HLL, BITMAP, ARRAY, STRUCT, MAP, QUANTILE_STATE, JSONB
// sql """
// CREATE TABLE IF NOT EXISTS ${fullTblName} (
// `c_id` LARGEINT NOT NULL,
// `c_boolean` BOOLEAN,
// `c_int` INT,
// `c_float` FLOAT,
// `c_double` DOUBLE,
// `c_decimal` DECIMAL(6, 4),
// `c_varchar` VARCHAR(10),
// `c_datev2` DATEV2 NOT NULL
// ) ENGINE=OLAP
// DUPLICATE KEY(`c_id`)
// PARTITION BY LIST(`c_datev2`)
// (
// PARTITION `p_20230501` VALUES IN ("2023-05-01"),
// PARTITION `p_20230502` VALUES IN ("2023-05-02"),
// PARTITION `p_20230503` VALUES IN ("2023-05-03"),
// PARTITION `p_20230504` VALUES IN ("2023-05-04"),
// PARTITION `p_20230505` VALUES IN ("2023-05-05")
// )
// DISTRIBUTED BY HASH(`c_id`) BUCKETS 1
// PROPERTIES ("replication_num" = "1");
// """
//
// sql """ INSERT INTO ${fullTblName} VALUES (10001, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-01");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10002, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-02");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10003, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-03");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10004, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-04");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10005, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-05");"""
//
// sql """ INSERT INTO ${fullTblName} VALUES (10001, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-01");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10002, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-02");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10003, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-03");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10004, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-04");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10005, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-05");"""
//
// sql """ INSERT INTO ${fullTblName} VALUES (10001, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-01");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10002, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-02");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10003, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-03");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10004, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-04");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10005, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-05");"""
//
// sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
//
// sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
//
// sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
//
// sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
//
// sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
//
// sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
//
// sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
// sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
sql """
CREATE DATABASE IF NOT EXISTS ${dbName};
"""
sql """
DROP TABLE IF EXISTS ${fullTblName};
"""
// Unsupported type: HLL, BITMAP, ARRAY, STRUCT, MAP, QUANTILE_STATE, JSONB
sql """
CREATE TABLE IF NOT EXISTS ${fullTblName} (
`c_id` LARGEINT NOT NULL,
`c_boolean` BOOLEAN,
`c_int` INT,
`c_float` FLOAT,
`c_double` DOUBLE,
`c_decimal` DECIMAL(6, 4),
`c_varchar` VARCHAR(10),
`c_datev2` DATEV2 NOT NULL
) ENGINE=OLAP
DUPLICATE KEY(`c_id`)
PARTITION BY LIST(`c_datev2`)
(
PARTITION `p_20230501` VALUES IN ("2023-05-01"),
PARTITION `p_20230502` VALUES IN ("2023-05-02"),
PARTITION `p_20230503` VALUES IN ("2023-05-03"),
PARTITION `p_20230504` VALUES IN ("2023-05-04"),
PARTITION `p_20230505` VALUES IN ("2023-05-05")
)
DISTRIBUTED BY HASH(`c_id`) BUCKETS 1
PROPERTIES ("replication_num" = "1");
"""
sql """ INSERT INTO ${fullTblName} VALUES (10001, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-01");"""
sql """ INSERT INTO ${fullTblName} VALUES (10002, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-02");"""
sql """ INSERT INTO ${fullTblName} VALUES (10003, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-03");"""
sql """ INSERT INTO ${fullTblName} VALUES (10004, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-04");"""
sql """ INSERT INTO ${fullTblName} VALUES (10005, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-05");"""
sql """ INSERT INTO ${fullTblName} VALUES (10001, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-01");"""
sql """ INSERT INTO ${fullTblName} VALUES (10002, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-02");"""
sql """ INSERT INTO ${fullTblName} VALUES (10003, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-03");"""
sql """ INSERT INTO ${fullTblName} VALUES (10004, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-04");"""
sql """ INSERT INTO ${fullTblName} VALUES (10005, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-05");"""
sql """ INSERT INTO ${fullTblName} VALUES (10001, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-01");"""
sql """ INSERT INTO ${fullTblName} VALUES (10002, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-02");"""
sql """ INSERT INTO ${fullTblName} VALUES (10003, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-03");"""
sql """ INSERT INTO ${fullTblName} VALUES (10004, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-04");"""
sql """ INSERT INTO ${fullTblName} VALUES (10005, NULL, NULL, NULL, NULL, NULL, NULL, "2023-05-05");"""
sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
sql """ INSERT INTO ${fullTblName} VALUES (10001, 0, "11", 11.0, 11.11, 11.1000, "aaa", "2023-05-01");"""
sql """ INSERT INTO ${fullTblName} VALUES (10002, 1, "22", 22.0, 22.22, 22.2000, "bbb", "2023-05-02");"""
sql """ INSERT INTO ${fullTblName} VALUES (10003, 0, "33", 33.0, 33.33, 33.3000, "ccc", "2023-05-03");"""
sql """ INSERT INTO ${fullTblName} VALUES (10004, 1, "44", 44.0, 44.44, 44.4000, "ddd", "2023-05-04");"""
sql """ INSERT INTO ${fullTblName} VALUES (10005, 0, "55", 55.0, 55.55, 55.5000, "eee", "2023-05-05");"""
order_qt_check_inserted_data """
SELECT * FROM ${fullTblName};
"""
// order_qt_check_inserted_data """
// SELECT * FROM ${fullTblName};
// """
/***************************************** Data initialization End *****************************************/
/***************************************** Universal analysis Begin ****************************************/
sql """
ANALYZE TABLE ${fullTblName} WITH sync;
"""
// sql """
// ANALYZE TABLE ${fullTblName} WITH sync;
// """
sql """
ANALYZE TABLE ${fullTblName} UPDATE HISTOGRAM WITH sync;
"""
// sql """
// ANALYZE TABLE ${fullTblName} UPDATE HISTOGRAM WITH sync;
// """
order_qt_check_column_stats """
SELECT $colStatisticsSchema FROM ${colStatisticsTblName}
WHERE `col_id` IN ($tblColumnNames);
"""
// order_qt_check_column_stats """
// SELECT $colStatisticsSchema FROM ${colStatisticsTblName}
// WHERE `col_id` IN ($tblColumnNames);
// """
order_qt_check_histogram_stats """
SELECT $colHistogramSchema FROM ${colHistogramTblName}
WHERE `col_id` IN ($tblColumnNames);
"""
// order_qt_check_histogram_stats """
// SELECT $colHistogramSchema FROM ${colHistogramTblName}
// WHERE `col_id` IN ($tblColumnNames);
// """
/*************************************** Universal analysis test End ***************************************/
/******************************************* Clean up data Begin *******************************************/
sql """
DROP DATABASE IF EXISTS ${dbName};
"""
// sql """
// DROP DATABASE IF EXISTS ${dbName};
// """
// TODO At present, "DELETE FROM" may fail to delete, so comment it out temporarily
// sql """