diff --git a/docs/en/docs/query-acceleration/statistics.md b/docs/en/docs/query-acceleration/statistics.md index c7a5827758..4cb0891172 100644 --- a/docs/en/docs/query-acceleration/statistics.md +++ b/docs/en/docs/query-acceleration/statistics.md @@ -295,8 +295,8 @@ mysql> KILL ANALYZE 52357; |auto_analyze_end_time|End time for automatic statistics collection|23:59:59| |enable_auto_analyze|Enable automatic collection functionality|true| |huge_table_default_sample_rows|Sampling rows for large tables|4194304| -|huge_table_lower_bound_size_in_bytes|Tables with size greater than this value will be automatically sampled during collection of statistics|5368709120| -|huge_table_auto_analyze_interval_in_millis|Controls the minimum time interval for automatic ANALYZE on large tables. Tables with sizes greater than `huge_table_lower_bound_size_in_bytes * 5` will be ANALYZEed only once within this time interval.|43200000| +|huge_table_lower_bound_size_in_bytes|Tables with size greater than this value will be automatically sampled during collection of statistics|0| +|huge_table_auto_analyze_interval_in_millis|Controls the minimum time interval for automatic ANALYZE on large tables. Tables with sizes greater than `huge_table_lower_bound_size_in_bytes * 5` will be ANALYZEed only once within this time interval.|0| |table_stats_health_threshold|Ranges from 0 to 100. If data updates since the last statistics collection exceed `(100 - table_stats_health_threshold)%`, the table's statistics are considered outdated.|60| |analyze_timeout|Controls the timeout for synchronous ANALYZE in seconds|43200| |auto_analyze_table_width_threshold|Controls the maximum width of table that will be auto analyzed. Table with more columns than this value will not be auto analyzed.|70| diff --git a/docs/zh-CN/docs/query-acceleration/statistics.md b/docs/zh-CN/docs/query-acceleration/statistics.md index 20b535e357..bff100fa98 100644 --- a/docs/zh-CN/docs/query-acceleration/statistics.md +++ b/docs/zh-CN/docs/query-acceleration/statistics.md @@ -299,8 +299,8 @@ mysql> KILL ANALYZE 52357; |auto_analyze_end_time|自动统计信息收集结束时间|23:59:59| |enable_auto_analyze|开启自动收集功能|true| |huge_table_default_sample_rows|对大表的采样行数|4194304| -|huge_table_lower_bound_size_in_bytes|大小超过该值的的表,在自动收集时将会自动通过采样收集统计信息|5368709120| -|huge_table_auto_analyze_interval_in_millis|控制对大表的自动ANALYZE的最小时间间隔,在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes * 5的表仅ANALYZE一次|43200000| +|huge_table_lower_bound_size_in_bytes|大小超过该值的的表,在自动收集时将会自动通过采样收集统计信息|0| +|huge_table_auto_analyze_interval_in_millis|控制对大表的自动ANALYZE的最小时间间隔,在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes * 5的表仅ANALYZE一次|0| |table_stats_health_threshold|取值在0-100之间,当自上次统计信息收集操作之后,数据更新量达到 (100 - table_stats_health_threshold)% ,认为该表的统计信息已过时|60| |analyze_timeout|控制ANALYZE超时时间,单位为秒|43200| |auto_analyze_table_width_threshold|控制自动统计信息收集处理的最大表宽度,列数大于该值的表不会参与自动统计信息收集|70| diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 3554505990..c1ea2f29ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -1450,7 +1450,7 @@ public class SessionVariable implements Serializable, Writable { "This controls the minimum time interval for automatic ANALYZE on large tables." + "Within this interval," + "tables larger than huge_table_lower_bound_size_in_bytes are analyzed only once."}) - public long hugeTableAutoAnalyzeIntervalInMillis = TimeUnit.HOURS.toMillis(12); + public long hugeTableAutoAnalyzeIntervalInMillis = TimeUnit.HOURS.toMillis(0); @VariableMgr.VarAttr(name = EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL, description = {"控制对外表的自动ANALYZE的最小时间间隔,在该时间间隔内的外表仅ANALYZE一次", diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 65bb4a5dd9..aaff9e5992 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -188,6 +188,9 @@ public class AnalysisInfo implements Writable { @SerializedName("endTime") public long endTime; + + @SerializedName("emptyJob") + public final boolean emptyJob; /** * * Used to store the newest partition version of tbl when creating this job. @@ -202,7 +205,7 @@ public class AnalysisInfo implements Writable { long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType, boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition, boolean isAllPartition, long partitionCount, CronExpression cronExpression, boolean forceFull, - boolean usingSqlForPartitionColumn, long tblUpdateTime) { + boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob) { this.jobId = jobId; this.taskId = taskId; this.taskIds = taskIds; @@ -238,6 +241,7 @@ public class AnalysisInfo implements Writable { this.forceFull = forceFull; this.usingSqlForPartitionColumn = usingSqlForPartitionColumn; this.tblUpdateTime = tblUpdateTime; + this.emptyJob = emptyJob; } @Override @@ -279,6 +283,7 @@ public class AnalysisInfo implements Writable { } sj.add("forceFull: " + forceFull); sj.add("usingSqlForPartitionColumn: " + usingSqlForPartitionColumn); + sj.add("emptyJob: " + emptyJob); return sj.toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index 204aba6d0f..310b7816ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -61,8 +61,8 @@ public class AnalysisInfoBuilder { private CronExpression cronExpression; private boolean forceFull; private boolean usingSqlForPartitionColumn; - private long tblUpdateTime; + private boolean emptyJob; public AnalysisInfoBuilder() { } @@ -100,6 +100,7 @@ public class AnalysisInfoBuilder { forceFull = info.forceFull; usingSqlForPartitionColumn = info.usingSqlForPartitionColumn; tblUpdateTime = info.tblUpdateTime; + emptyJob = info.emptyJob; } public AnalysisInfoBuilder setJobId(long jobId) { @@ -262,12 +263,17 @@ public class AnalysisInfoBuilder { return this; } + public AnalysisInfoBuilder setEmptyJob(boolean emptyJob) { + this.emptyJob = emptyJob; + return this; + } + public AnalysisInfo build() { return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, colToPartitions, partitionNames, colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent, sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType, externalTableLevelTask, partitionOnly, samplingPartition, isAllPartition, partitionCount, - cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime); + cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 0bf24e0c28..39ae191d45 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -519,6 +519,7 @@ public class AnalysisManager implements Writable { infoBuilder.setColToPartitions(colToPartitions); infoBuilder.setTaskIds(Lists.newArrayList()); infoBuilder.setTblUpdateTime(table.getUpdateTime()); + infoBuilder.setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0); return infoBuilder.build(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index e062e4eef8..81348c1f94 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -61,7 +61,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask { public void doExecute() throws Exception { Set partitionNames = info.colToPartitions.get(info.colName); - if (partitionNames == null || partitionNames.isEmpty()) { + if ((info.emptyJob && info.analysisMethod.equals(AnalysisInfo.AnalysisMethod.SAMPLE)) + || partitionNames == null || partitionNames.isEmpty()) { if (partitionNames == null) { LOG.warn("Table {}.{}.{}, partitionNames for column {} is null. ColToPartitions:[{}]", info.catalogId, info.dbId, info.tblId, info.colName, info.colToPartitions); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index 3d6d2fe52a..857a50e234 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -88,7 +88,7 @@ public class StatisticConstants { public static final long HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 4194304; public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 0; - public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(12); + public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(0); public static final long EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(24); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index ee50471175..f799da5620 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -170,6 +170,7 @@ public class StatisticsAutoCollector extends StatisticsCollector { .setLastExecTimeInMs(System.currentTimeMillis()) .setJobType(JobType.SYSTEM) .setTblUpdateTime(table.getUpdateTime()) + .setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0) .build(); analysisInfos.add(jobInfo); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index f500ab09f0..eb6672ffe1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -149,7 +149,7 @@ public class TableStatsMeta implements Writable { if (tableIf instanceof OlapTable) { rowCount = tableIf.getRowCount(); } - if (analyzedJob.colToPartitions.keySet() + if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet() .containsAll(tableIf.getBaseSchema().stream() .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) .map(Column::getName).collect(Collectors.toSet()))) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 0b4b2203d0..87342202fb 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -299,7 +299,7 @@ public class StatisticsAutoCollectorTest { // A very huge table has been updated recently, so we should skip it this time stats.updatedTime = System.currentTimeMillis() - 1000; StatisticsAutoCollector autoCollector = new StatisticsAutoCollector(); - Assertions.assertTrue(autoCollector.skip(olapTable)); + Assertions.assertFalse(autoCollector.skip(olapTable)); // The update of this huge table is long time ago, so we shouldn't skip it this time stats.updatedTime = System.currentTimeMillis() - StatisticsUtil.getHugeTableAutoAnalyzeIntervalInMillis() - 10000; diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index e7e89f858f..64967280ce 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -1168,7 +1168,7 @@ PARTITION `p599` VALUES IN (599) sql """ INSERT INTO test_updated_rows SELECT * FROM test_updated_rows """ sql """ANALYZE TABLE test_updated_rows WITH SYNC""" def cnt2 = sql """ SHOW TABLE STATS test_updated_rows """ - assertEquals(Integer.valueOf(cnt2[0][0]), 0) + assertTrue(Integer.valueOf(cnt2[0][0]) == 0 || Integer.valueOf(cnt2[0][0]) == 8) // test analyze specific column sql """CREATE TABLE test_analyze_specific_column (col1 varchar(11451) not null, col2 int not null, col3 int not null)