Pick [enhancement](fe-meta) Support skip specified journal to avoid FE can't be start due to a damaged journal (#35783) (#51877)

If fe meta has damaged journals, fe can't be started successfully.
Adding a configuration to skip the damaged journals.

### What problem does this PR solve?

Issue Number: close #xxx

Related PR: #xxx

Problem Summary:

### Release note

None

### Check List (For Author)

- Test <!-- At least one of them must be included. -->
    - [ ] Regression test
    - [ ] Unit Test
    - [ ] Manual test (add detailed scripts or steps below)
    - [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
        - [x] Previous test can cover this change.
        - [ ] No code files have been changed.
        - [ ] Other reason <!-- Add your reason?  -->

- Behavior changed:
    - [x] No.
    - [ ] Yes. <!-- Explain the behavior change -->

- Does this need documentation?
    - [x] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->

### Check List (For Reviewer who merge this PR)

- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->

Co-authored-by: zxealous <zhouchangyue@baidu.com>
This commit is contained in:
Lei Zhang
2025-06-20 14:01:43 +08:00
committed by GitHub
parent a75760d18f
commit 7ef4824575
3 changed files with 30 additions and 1 deletions

View File

@ -1277,6 +1277,12 @@ public class Config extends ConfigBase {
@ConfField(mutable = true, masterOnly = true)
public static boolean force_do_metadata_checkpoint = false;
/**
* If some joural is wrong, and FE can't start, we can use this to skip it.
*/
@ConfField(mutable = false, masterOnly = false)
public static String[] force_skip_journal_ids = {};
/**
* Decide how often to check dynamic partition
*/

View File

@ -314,6 +314,7 @@ import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
@ -564,6 +565,8 @@ public class Env {
private final Map<String, Supplier<MasterDaemon>> configtoThreads = ImmutableMap
.of("dynamic_partition_check_interval_seconds", this::getDynamicPartitionScheduler);
private final List<String> forceSkipJournalIds = Arrays.asList(Config.force_skip_journal_ids);
public List<TFrontendInfo> getFrontendInfos() {
List<TFrontendInfo> res = new ArrayList<>();
@ -976,6 +979,10 @@ public class Env {
return dnsCache;
}
public List<String> getForceSkipJournalIds() {
return forceSkipJournalIds;
}
// Use tryLock to avoid potential dead lock
private boolean tryLock(boolean mustLock) {
while (true) {
@ -2849,7 +2856,19 @@ public class Env {
Long logId = kv.first;
JournalEntity entity = kv.second;
if (entity == null) {
break;
if (logId != null && forceSkipJournalIds.contains(String.valueOf(logId))) {
replayedJournalId.incrementAndGet();
String msg = "journal " + replayedJournalId + " has skipped by config force_skip_journal_id";
LOG.info(msg);
LogUtils.stdout(msg);
if (MetricRepo.isInit) {
// Metric repo may not init after this replay thread start
MetricRepo.COUNTER_EDIT_LOG_READ.increase(1L);
}
continue;
} else {
break;
}
}
hasLog = true;
EditLog.loadJournal(this, logId, entity);

View File

@ -17,6 +17,7 @@
package org.apache.doris.journal.bdbje;
import org.apache.doris.catalog.Env;
import org.apache.doris.common.Pair;
import org.apache.doris.journal.JournalCursor;
import org.apache.doris.journal.JournalEntity;
@ -93,6 +94,9 @@ public class BDBJournalCursor implements JournalCursor {
return null;
}
if (Env.getCurrentEnv().getForceSkipJournalIds().contains(String.valueOf(currentKey))) {
return Pair.of(currentKey++, null);
}
Long key = currentKey;
DatabaseEntry theKey = new DatabaseEntry();
TupleBinding<Long> myBinding = TupleBinding.getPrimitiveBinding(Long.class);