From 7ef482457582f8bc9cbb7be5bc01792c3e4fa69f Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Fri, 20 Jun 2025 14:01:43 +0800 Subject: [PATCH] Pick [enhancement](fe-meta) Support skip specified journal to avoid FE can't be start due to a damaged journal (#35783) (#51877) If fe meta has damaged journals, fe can't be started successfully. Adding a configuration to skip the damaged journals. ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [x] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [x] No. - [ ] Yes. - Does this need documentation? - [x] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label Co-authored-by: zxealous --- .../java/org/apache/doris/common/Config.java | 6 ++++++ .../java/org/apache/doris/catalog/Env.java | 21 ++++++++++++++++++- .../doris/journal/bdbje/BDBJournalCursor.java | 4 ++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 5a5548d3dd..fe97f368cb 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1277,6 +1277,12 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true) public static boolean force_do_metadata_checkpoint = false; + /** + * If some joural is wrong, and FE can't start, we can use this to skip it. + */ + @ConfField(mutable = false, masterOnly = false) + public static String[] force_skip_journal_ids = {}; + /** * Decide how often to check dynamic partition */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 5b972b8c52..67172e95d1 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -314,6 +314,7 @@ import java.io.IOException; import java.net.HttpURLConnection; import java.net.InetSocketAddress; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; @@ -564,6 +565,8 @@ public class Env { private final Map> configtoThreads = ImmutableMap .of("dynamic_partition_check_interval_seconds", this::getDynamicPartitionScheduler); + private final List forceSkipJournalIds = Arrays.asList(Config.force_skip_journal_ids); + public List getFrontendInfos() { List res = new ArrayList<>(); @@ -976,6 +979,10 @@ public class Env { return dnsCache; } + public List getForceSkipJournalIds() { + return forceSkipJournalIds; + } + // Use tryLock to avoid potential dead lock private boolean tryLock(boolean mustLock) { while (true) { @@ -2849,7 +2856,19 @@ public class Env { Long logId = kv.first; JournalEntity entity = kv.second; if (entity == null) { - break; + if (logId != null && forceSkipJournalIds.contains(String.valueOf(logId))) { + replayedJournalId.incrementAndGet(); + String msg = "journal " + replayedJournalId + " has skipped by config force_skip_journal_id"; + LOG.info(msg); + LogUtils.stdout(msg); + if (MetricRepo.isInit) { + // Metric repo may not init after this replay thread start + MetricRepo.COUNTER_EDIT_LOG_READ.increase(1L); + } + continue; + } else { + break; + } } hasLog = true; EditLog.loadJournal(this, logId, entity); diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJournalCursor.java b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJournalCursor.java index 8c48cab42c..f6068d5948 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJournalCursor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJournalCursor.java @@ -17,6 +17,7 @@ package org.apache.doris.journal.bdbje; +import org.apache.doris.catalog.Env; import org.apache.doris.common.Pair; import org.apache.doris.journal.JournalCursor; import org.apache.doris.journal.JournalEntity; @@ -93,6 +94,9 @@ public class BDBJournalCursor implements JournalCursor { return null; } + if (Env.getCurrentEnv().getForceSkipJournalIds().contains(String.valueOf(currentKey))) { + return Pair.of(currentKey++, null); + } Long key = currentKey; DatabaseEntry theKey = new DatabaseEntry(); TupleBinding myBinding = TupleBinding.getPrimitiveBinding(Long.class);