From 964ac4e601d16cfd99348b9556ab24deccfb52d6 Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Wed, 26 Jul 2023 17:16:56 +0800 Subject: [PATCH] [opt](nereids) Retry when async analyze task failed (#21889) Retry at most 5 times when async analyze task execution failed --- .../java/org/apache/doris/common/Config.java | 2 +- fe/fe-core/pom.xml | 2 +- .../doris/statistics/AnalysisManager.java | 2 +- .../doris/statistics/AnalysisTaskExecutor.java | 2 +- .../doris/statistics/BaseAnalysisTask.java | 17 ++++++++++++++++- .../doris/statistics/HMSAnalysisTask.java | 2 +- .../apache/doris/statistics/HistogramTask.java | 2 +- .../apache/doris/statistics/MVAnalysisTask.java | 2 +- .../doris/statistics/OlapAnalysisTask.java | 2 +- .../doris/statistics/StatisticConstants.java | 2 ++ .../doris/statistics/util/StatisticsUtil.java | 3 +++ .../doris/statistics/AnalysisJobTest.java | 2 +- .../statistics/AnalysisTaskExecutorTest.java | 2 +- .../doris/statistics/HistogramTaskTest.java | 2 +- 14 files changed, 32 insertions(+), 12 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 636fd8a46f..711bc5ef00 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2018,7 +2018,7 @@ public class Config extends ConfigBase { public static boolean enable_feature_binlog = false; @ConfField - public static int analyze_task_timeout_in_minutes = 120; + public static int analyze_task_timeout_in_hours = 12; @ConfField(mutable = true, masterOnly = true, description = { "是否禁止使用 WITH REOSOURCE 语句创建 Catalog。", diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml index 45ab1391a6..3e1ad92c28 100644 --- a/fe/fe-core/pom.xml +++ b/fe/fe-core/pom.xml @@ -820,7 +820,7 @@ under the License. false false - -javaagent:${settings.localRepository}/org/jmockit/jmockit/${jmockit.version}/jmockit-${jmockit.version}.jar + -Xmx512m -javaagent:${settings.localRepository}/org/jmockit/jmockit/${jmockit.version}/jmockit-${jmockit.version}.jar diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 63bb387992..576fcf440e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -816,7 +816,7 @@ public class AnalysisManager extends Daemon implements Writable { continue; } try { - task.execute(); + task.doExecute(); updateSyncTaskStatus(task, AnalysisState.FINISHED); } catch (Throwable t) { colNames.add(task.info.colName); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java index 00e2b9fc76..11f244afd8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java @@ -71,7 +71,7 @@ public class AnalysisTaskExecutor extends Thread { try { AnalysisTaskWrapper taskWrapper = taskQueue.take(); try { - long timeout = TimeUnit.MINUTES.toMillis(Config.analyze_task_timeout_in_minutes) + long timeout = TimeUnit.HOURS.toMillis(Config.analyze_task_timeout_in_hours) - (System.currentTimeMillis() - taskWrapper.getStartTime()); taskWrapper.get(timeout < 0 ? 0 : timeout, TimeUnit.MILLISECONDS); } catch (Exception e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 7197fc4442..c486bf36d1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -160,7 +160,22 @@ public abstract class BaseAnalysisTask { } - public abstract void execute() throws Exception; + public void execute() { + int retriedTimes = 0; + while (retriedTimes <= StatisticConstants.ANALYZE_TASK_RETRY_TIMES) { + if (killed) { + break; + } + try { + doExecute(); + break; + } catch (Throwable t) { + LOG.warn("Failed to execute analysis task, retried times: {}", retriedTimes++, t); + } + } + } + + public abstract void doExecute() throws Exception; public void cancel() { killed = true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index 4483f738a7..87e7bbe1b5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -104,7 +104,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask { table = (HMSExternalTable) tbl; } - public void execute() throws Exception { + public void doExecute() throws Exception { if (isTableLevelTask) { getTableStats(); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java index fdadff6c05..a0b6e6d9a5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java @@ -58,7 +58,7 @@ public class HistogramTask extends BaseAnalysisTask { } @Override - public void execute() throws Exception { + public void doExecute() throws Exception { Map params = new HashMap<>(); params.put("internalDB", FeConstants.INTERNAL_DB_NAME); params.put("histogramStatTbl", StatisticConstants.HISTOGRAM_TBL_NAME); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java index 701f3109b8..31b3b76d5d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java @@ -86,7 +86,7 @@ public class MVAnalysisTask extends BaseAnalysisTask { } @Override - public void execute() throws Exception { + public void doExecute() throws Exception { for (Column column : meta.getSchema()) { SelectStmt selectOne = (SelectStmt) selectStmt.clone(); TableRef tableRef = selectOne.getTableRefs().get(0); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 1f378d21c7..fab9f68197 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -59,7 +59,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask { super(info); } - public void execute() throws Exception { + public void doExecute() throws Exception { Map params = new HashMap<>(); params.put("internalDB", FeConstants.INTERNAL_DB_NAME); params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index eb9572bb74..e9bccff5b9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -84,6 +84,8 @@ public class StatisticConstants { public static List STATISTICS_DB_BLACK_LIST = new ArrayList<>(); + public static int ANALYZE_TASK_RETRY_TIMES = 5; + static { STATISTICS_DB_BLACK_LIST.add(SystemInfoService.DEFAULT_CLUSTER + ClusterNamespace.CLUSTER_DELIMITER + FeConstants.INTERNAL_DB_NAME); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index b118a7d02f..046576c5a8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -121,6 +121,9 @@ public class StatisticsUtil { } public static List execStatisticQuery(String sql) { + if (FeConstants.disableInternalSchemaDb) { + return Collections.emptyList(); + } try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) { StmtExecutor stmtExecutor = new StmtExecutor(r.connectContext, sql); r.connectContext.setExecutor(stmtExecutor); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java index 86c3d63463..ca145f07f2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java @@ -130,7 +130,7 @@ public class AnalysisJobTest extends TestWithFeService { .setAnalysisType(AnalysisType.FUNDAMENTALS) .setColToPartitions(colToPartitions) .build(); - new OlapAnalysisTask(analysisJobInfo).execute(); + new OlapAnalysisTask(analysisJobInfo).doExecute(); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java index 264015dd9e..c75c5e8f9e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java @@ -127,7 +127,7 @@ public class AnalysisTaskExecutorTest extends TestWithFeService { }; new Expectations() { { - task.execute(); + task.doExecute(); times = 1; } }; diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java index 0c6b507b13..320e9f855a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java @@ -120,7 +120,7 @@ public class HistogramTaskTest extends TestWithFeService { }; new Expectations() { { - task.execute(); + task.doExecute(); times = 1; } };