From 0ae3a6df7e863385ce53bcf5939d77debe412e08 Mon Sep 17 00:00:00 2001 From: Lei Zhang <27994433+SWJTU-ZhangLei@users.noreply.github.com> Date: Fri, 21 Apr 2023 20:58:42 +0800 Subject: [PATCH] [bug](bdbje) Add retry for reSetupBdbEnvironment() `restore.execute()` (#18777) * In reSetupBdbEnvironment() `restore.execute()` may throw NullPointerException, add retry for `restore.execute()` --- .../doris/journal/bdbje/BDBJEJournal.java | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java index 5cc1ac6e09..20cb6b0070 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java @@ -375,10 +375,25 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B // ATTN: here we use `getServingEnv()`, because only serving catalog has // helper nodes. HostInfo helperNode = Env.getServingEnv().getHelperNode(); - NetworkRestore restore = new NetworkRestore(); - NetworkRestoreConfig config = new NetworkRestoreConfig(); - config.setRetainLogFiles(false); - restore.execute(insufficientLogEx, config); + + for (int i = 0; i < RETRY_TIME; i++) { + try { + NetworkRestore restore = new NetworkRestore(); + NetworkRestoreConfig config = new NetworkRestoreConfig(); + config.setRetainLogFiles(false); + restore.execute(insufficientLogEx, config); + break; + } catch (Exception e) { + LOG.warn("retry={}, reSetupBdbEnvironment exception:", i, e); + try { + Thread.sleep(5 * 1000); + LOG.warn("after sleep insufficientLogEx:", insufficientLogEx); + } catch (InterruptedException e1) { + LOG.warn("InterruptedException", e1); + } + } + } + bdbEnvironment.close(); bdbEnvironment.setup(new File(environmentPath), selfNodeName, selfNodeHostPort, helperNode.getIp() + ":" + helperNode.getPort(), Env.getServingEnv().isElectable());