From 5505fa3755fdc7aa2e88899943ad8f614dfbdf80 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Sun, 24 Dec 2023 01:49:49 +0800 Subject: [PATCH] [Fix](statistics) Fix partition name NPE and sample for all table during auto analyze (#28916) Fix partition name NPE and sample for all table during auto analyze. Sample for all tables because getData may have latency, which may cause full analyze a huge table and use too much resource. Sample for all tables to avoid this. Will improve the strategy later. --- .../src/main/java/org/apache/doris/qe/SessionVariable.java | 2 +- .../java/org/apache/doris/statistics/OlapAnalysisTask.java | 6 +++++- .../org/apache/doris/statistics/StatisticConstants.java | 2 +- .../apache/doris/statistics/StatisticsAutoCollector.java | 2 +- .../org/apache/doris/statistics/HMSAnalysisTaskTest.java | 2 +- .../org/apache/doris/statistics/OlapAnalysisTaskTest.java | 2 +- .../doris/statistics/StatisticsAutoCollectorTest.java | 2 +- 7 files changed, 11 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 7d7fdb1404..b9658bf34a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -1422,7 +1422,7 @@ public class SessionVariable implements Serializable, Writable { + "When enable_auto_sample is enabled, tables" + "larger than this value will automatically collect " + "statistics through sampling"}) - public long hugeTableLowerBoundSizeInBytes = 5L * 1024 * 1024 * 1024; + public long hugeTableLowerBoundSizeInBytes = 0; @VariableMgr.VarAttr(name = HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL, description = {"控制对大表的自动ANALYZE的最小时间间隔," diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 50042e4610..e062e4eef8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -61,7 +61,11 @@ public class OlapAnalysisTask extends BaseAnalysisTask { public void doExecute() throws Exception { Set partitionNames = info.colToPartitions.get(info.colName); - if (partitionNames.isEmpty()) { + if (partitionNames == null || partitionNames.isEmpty()) { + if (partitionNames == null) { + LOG.warn("Table {}.{}.{}, partitionNames for column {} is null. ColToPartitions:[{}]", + info.catalogId, info.dbId, info.tblId, info.colName, info.colToPartitions); + } StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId, info.tblId, info.indexId, info.colName, null); job.appendBuf(this, Arrays.asList(new ColStatsData(statsId))); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index 111305b03e..3d6d2fe52a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -86,7 +86,7 @@ public class StatisticConstants { public static final int INSERT_MERGE_ITEM_COUNT = 200; public static final long HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 4194304; - public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 5L * 1024 * 1024 * 1024; + public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 0; public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(12); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 244b1059d7..ee50471175 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -148,7 +148,7 @@ public class StatisticsAutoCollector extends StatisticsCollector { protected void createAnalyzeJobForTbl(DatabaseIf db, List analysisInfos, TableIf table) { - AnalysisMethod analysisMethod = table.getDataSize(true) > StatisticsUtil.getHugeTableLowerBoundSizeInBytes() + AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes() ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; AnalysisInfo jobInfo = new AnalysisInfoBuilder() .setJobId(Env.getCurrentEnv().getNextId()) diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java index a569a5cb06..12a1a9c046 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java @@ -86,7 +86,7 @@ public class HMSAnalysisTaskTest { new MockUp() { @Mock public long getDataSize(boolean singleReplica) { - return 1000; + return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1; } }; HMSAnalysisTask task = new HMSAnalysisTask(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index 5b27c79c86..e0b5a4b047 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -101,7 +101,7 @@ public class OlapAnalysisTaskTest { @Mock public long getDataSize(boolean singleReplica) { - return 1000; + return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1; } }; diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 9f6d3db74b..0b4b2203d0 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -346,7 +346,7 @@ public class StatisticsAutoCollectorTest { @Mock public long getDataSize(boolean singleReplica) { - return 1000; + return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1; } @Mock