From 93da0ebaf428e0979bfeeb7794b2ed83890482a0 Mon Sep 17 00:00:00 2001 From: walter Date: Thu, 29 Aug 2024 12:03:14 +0800 Subject: [PATCH] [chore](backup) limit the involved tablets in a backup job #39987 (#40080) cherry pick from #39987 --- .../src/main/java/org/apache/doris/common/Config.java | 9 +++++++++ .../main/java/org/apache/doris/backup/BackupJob.java | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 5e0cb99b63..d5939715f3 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1526,6 +1526,15 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true) public static int max_backup_restore_job_num_per_db = 10; + /** + * Control the max num of tablets per backup job involved. + */ + @ConfField(mutable = true, masterOnly = true, description = { + "用于控制每次 backup job 允许备份的 tablet 上限,以避免 OOM", + "Control the max num of tablets per backup job involved, to avoid OOM" + }) + public static int max_backup_tablets_per_job = 300000; + /** * whether to ignore table that not support type when backup, and not report exception. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java index 9b3edbad52..4dc8824d05 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java @@ -513,6 +513,17 @@ public class BackupJob extends AbstractJob { } } + // Limit the max num of tablets involved in a backup job, to avoid OOM. + if (unfinishedTaskIds.size() > Config.max_backup_tablets_per_job) { + String msg = String.format("the num involved tablets %d exceeds the limit %d, " + + "which might cause the FE OOM, change config `max_backup_tablets_per_job` " + + "to change this limitation", + unfinishedTaskIds.size(), Config.max_backup_tablets_per_job); + LOG.warn(msg); + status = new Status(ErrCode.COMMON_ERROR, msg); + return; + } + backupMeta = new BackupMeta(copiedTables, copiedResources); // send tasks