From e4707154fa5867e362733b210882cbd1c6213891 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Tue, 9 Jan 2024 16:06:08 +0800 Subject: [PATCH] [opt](statistics) create or update table stats after alter column stats. Create or update table stats after alter column stats. Set flag to disable auto analyze for the table after user inject column stats. --- .../doris/analysis/ShowTableStatsStmt.java | 2 ++ .../apache/doris/statistics/AnalysisInfo.java | 5 +++- .../doris/statistics/AnalysisInfoBuilder.java | 9 +++++- .../doris/statistics/AnalysisManager.java | 12 ++++++++ .../statistics/StatisticsAutoCollector.java | 3 ++ .../statistics/StatisticsRepository.java | 8 +++++ .../doris/statistics/TableStatsMeta.java | 7 ++++- .../StatisticsAutoCollectorTest.java | 9 ++++++ .../suites/statistics/analyze_stats.groovy | 29 +++++++++++++++++++ 9 files changed, 81 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index 284b6248b8..95d36867da 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -55,6 +55,7 @@ public class ShowTableStatsStmt extends ShowStmt { .add("columns") .add("trigger") .add("new_partition") + .add("user_inject") .build(); private final TableName tableName; @@ -151,6 +152,7 @@ public class ShowTableStatsStmt extends ShowStmt { row.add(tableStatistic.analyzeColumns().toString()); row.add(tableStatistic.jobType.toString()); row.add(String.valueOf(tableStatistic.newPartitionLoaded.get())); + row.add(String.valueOf(tableStatistic.userInjected)); result.add(row); return new ShowResultSet(getMetaData(), result); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index aaff9e5992..97788174e6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -198,6 +198,8 @@ public class AnalysisInfo implements Writable { */ public final long tblUpdateTime; + public final boolean userInject; + public AnalysisInfo(long jobId, long taskId, List taskIds, long catalogId, long dbId, long tblId, Map> colToPartitions, Set partitionNames, String colName, Long indexId, JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType, @@ -205,7 +207,7 @@ public class AnalysisInfo implements Writable { long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType, boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition, boolean isAllPartition, long partitionCount, CronExpression cronExpression, boolean forceFull, - boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob) { + boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob, boolean userInject) { this.jobId = jobId; this.taskId = taskId; this.taskIds = taskIds; @@ -242,6 +244,7 @@ public class AnalysisInfo implements Writable { this.usingSqlForPartitionColumn = usingSqlForPartitionColumn; this.tblUpdateTime = tblUpdateTime; this.emptyJob = emptyJob; + this.userInject = userInject; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index 310b7816ec..22f3d22b3c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -63,6 +63,7 @@ public class AnalysisInfoBuilder { private boolean usingSqlForPartitionColumn; private long tblUpdateTime; private boolean emptyJob; + private boolean userInject; public AnalysisInfoBuilder() { } @@ -101,6 +102,7 @@ public class AnalysisInfoBuilder { usingSqlForPartitionColumn = info.usingSqlForPartitionColumn; tblUpdateTime = info.tblUpdateTime; emptyJob = info.emptyJob; + userInject = info.userInject; } public AnalysisInfoBuilder setJobId(long jobId) { @@ -268,12 +270,17 @@ public class AnalysisInfoBuilder { return this; } + public AnalysisInfoBuilder setUserInject(boolean userInject) { + this.userInject = userInject; + return this; + } + public AnalysisInfo build() { return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, colToPartitions, partitionNames, colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent, sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType, externalTableLevelTask, partitionOnly, samplingPartition, isAllPartition, partitionCount, - cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob); + cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob, userInject); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 8db50e667f..f56e800e83 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -560,6 +560,17 @@ public class AnalysisManager implements Writable { } } + @VisibleForTesting + public void updateTableStatsForAlterStats(AnalysisInfo jobInfo, TableIf tbl) { + TableStatsMeta tableStats = findTableStatsStatus(tbl.getId()); + if (tableStats == null) { + updateTableStatsStatus(new TableStatsMeta(0, jobInfo, tbl)); + } else { + tableStats.update(jobInfo, tbl); + logCreateTableStats(tableStats); + } + } + public List showAnalysisJob(ShowAnalyzeStmt stmt) { return findShowAnalyzeResult(analysisJobInfoMap.values(), stmt); } @@ -654,6 +665,7 @@ public class AnalysisManager implements Writable { } tableStats.updatedTime = 0; } + tableStats.userInjected = false; logCreateTableStats(tableStats); StatisticsRepository.dropStatistics(tblId, cols); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index fe90a2a59d..0f29d00da1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -158,6 +158,9 @@ public class StatisticsAutoCollector extends StatisticsCollector { if (tableStats == null || tableStats.newPartitionLoaded.get()) { return false; } + if (tableStats.userInjected) { + return true; + } return System.currentTimeMillis() - tableStats.updatedTime < StatisticsUtil.getHugeTableAutoAnalyzeIntervalInMillis(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 0ffc818b1a..b406faa4be 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -301,6 +301,14 @@ public class StatisticsRepository { StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, params); Env.getCurrentEnv().getStatisticsCache() .updateColStatsCache(objects.table.getId(), -1, colName, columnStatistic); + AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder() + .setTblUpdateTime(System.currentTimeMillis()) + .setColName("") + .setColToPartitions(Maps.newHashMap()) + .setUserInject(true) + .setJobType(AnalysisInfo.JobType.MANUAL) + .build(); + Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table); } else { // update partition granularity statistics for (Long partitionId : partitionIds) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 00878adcc4..926194a725 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -72,6 +72,9 @@ public class TableStatsMeta implements Writable { @SerializedName("newPartitionLoaded") public AtomicBoolean newPartitionLoaded = new AtomicBoolean(false); + @SerializedName("userInjected") + public boolean userInjected; + @VisibleForTesting public TableStatsMeta() { tblId = 0; @@ -130,13 +133,15 @@ public class TableStatsMeta implements Writable { public void update(AnalysisInfo analyzedJob, TableIf tableIf) { updatedTime = analyzedJob.tblUpdateTime; + userInjected = analyzedJob.userInject; String colNameStr = analyzedJob.colName; // colName field AnalyzeJob's format likes: "[col1, col2]", we need to remove brackets here // TODO: Refactor this later if (analyzedJob.colName.startsWith("[") && analyzedJob.colName.endsWith("]")) { colNameStr = colNameStr.substring(1, colNameStr.length() - 1); } - List cols = Arrays.stream(colNameStr.split(",")).map(String::trim).collect(Collectors.toList()); + List cols = Arrays.stream(colNameStr.split(",")) + .map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); for (String col : cols) { ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(col); if (colStatsMeta == null) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index f6def60908..68f83fa3e3 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -318,6 +318,15 @@ public class StatisticsAutoCollectorTest { }; // can't find table stats meta, which means this table never get analyzed, so we shouldn't skip it this time Assertions.assertFalse(autoCollector.skip(olapTable)); + new MockUp() { + + @Mock + public TableStatsMeta findTableStatsStatus(long tblId) { + return stats; + } + }; + stats.userInjected = true; + Assertions.assertTrue(autoCollector.skip(olapTable)); // this is not olap table nor external table, so we should skip it this time Assertions.assertTrue(autoCollector.skip(anyOtherTable)); } diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 1559b5350b..40fc4f9e59 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -2654,6 +2654,35 @@ PARTITION `p599` VALUES IN (599) contains "pushAggOp=MINMAX" } + // Test alter + sql """ + CREATE TABLE alter_test( + `id` int NOT NULL, + `name` VARCHAR(25) NOT NULL + )ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """ANALYZE TABLE alter_test WITH SYNC""" + def alter_result = sql """show table stats alter_test""" + assertEquals("false", alter_result[0][7]) + sql """alter table alter_test modify column id set stats ('row_count'='2.0E7', 'ndv'='3927659.0', 'num_nulls'='0.0', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');""" + alter_result = sql """show table stats alter_test""" + assertEquals("true", alter_result[0][7]) + sql """ANALYZE TABLE alter_test WITH SYNC""" + alter_result = sql """show table stats alter_test""" + assertEquals("false", alter_result[0][7]) + sql """alter table alter_test modify column id set stats ('row_count'='2.0E7', 'ndv'='3927659.0', 'num_nulls'='0.0', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');""" + alter_result = sql """show table stats alter_test""" + assertEquals("true", alter_result[0][7]) + sql """drop stats alter_test""" + alter_result = sql """show table stats alter_test""" + assertEquals("false", alter_result[0][7]) + // Test trigger type. sql """DROP DATABASE IF EXISTS trigger""" sql """CREATE DATABASE IF NOT EXISTS trigger"""