[enhancement](backup-restore) add config for upload/download task num per be (#27772)

set upload/download task num per be, and improve the overall speed of upload/download, enhance the performance of backup and recovery.

---------

Signed-off-by: nextdreamblue <zxw520blue1@163.com>
This commit is contained in:
xueweizhang
2023-12-04 11:19:45 +08:00
committed by GitHub
parent e3d2425d47
commit 80f528bf26
5 changed files with 57 additions and 6 deletions

View File

@ -2557,6 +2557,26 @@ MasterOnly:true
default timeout of backup job
#### `backup_upload_task_num_per_be`
Default:3
IsMutable:true
MasterOnly:true
The max number of upload tasks assigned to each be during the backup process, the default value is 3.
#### `restore_download_task_num_per_be`
Default:3
IsMutable:true
MasterOnly:true
The max number of download tasks assigned to each be during the restore process, the default value is 3.
#### `max_backup_restore_job_num_per_db`
Default: 10

View File

@ -2558,6 +2558,26 @@ SmallFileMgr 中存储的最大文件数
备份作业的默认超时时间
#### `backup_upload_task_num_per_be`
默认值:3
是否可以动态配置:true
是否为 Master FE 节点独有的配置项:true
备份过程中,分配给每个be的upload任务最大个数,默认值为3个。
#### `restore_download_task_num_per_be`
默认值:3
是否可以动态配置:true
是否为 Master FE 节点独有的配置项:true
恢复过程中,分配给每个be的download任务最大个数,默认值为3个。
#### `max_backup_restore_job_num_per_db`
默认值:10

View File

@ -2298,6 +2298,18 @@ public class Config extends ConfigBase {
"The path of the nereids trace file."})
public static String nereids_trace_log_dir = System.getenv("DORIS_HOME") + "/log/nereids_trace";
@ConfField(mutable = true, masterOnly = true, description = {
"备份过程中,分配给每个be的upload任务最大个数,默认值为3个。",
"The max number of upload tasks assigned to each be during the backup process, the default value is 3."
})
public static int backup_upload_task_num_per_be = 3;
@ConfField(mutable = true, masterOnly = true, description = {
"恢复过程中,分配给每个be的download任务最大个数,默认值为3个。",
"The max number of download tasks assigned to each be during the restore process, the default value is 3."
})
public static int restore_download_task_num_per_be = 3;
@ConfField(description = {"是否开启通过http接口获取log文件的功能",
"Whether to enable the function of getting log files through http interface"})
public static boolean enable_get_log_file_api = false;

View File

@ -34,6 +34,7 @@ import org.apache.doris.catalog.Resource;
import org.apache.doris.catalog.Table;
import org.apache.doris.catalog.Tablet;
import org.apache.doris.catalog.View;
import org.apache.doris.common.Config;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.util.TimeUtils;
import org.apache.doris.datasource.property.S3ClientBEProperties;
@ -614,8 +615,7 @@ public class BackupJob extends AbstractJob {
for (Long beId : beToSnapshots.keySet()) {
List<SnapshotInfo> infos = beToSnapshots.get(beId);
int totalNum = infos.size();
// each backend allot at most 3 tasks
int batchNum = Math.min(totalNum, 3);
int batchNum = Math.min(totalNum, Config.backup_upload_task_num_per_be);
// each task contains several upload sub tasks
int taskNumPerBatch = Math.max(totalNum / batchNum, 1);
LOG.info("backend {} has {} batch, total {} tasks, {}", beId, batchNum, totalNum, this);

View File

@ -52,6 +52,7 @@ import org.apache.doris.catalog.Tablet;
import org.apache.doris.catalog.TabletMeta;
import org.apache.doris.catalog.View;
import org.apache.doris.clone.DynamicPartitionScheduler;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.FeMetaVersion;
import org.apache.doris.common.MarkedCountDownLatch;
@ -1337,8 +1338,7 @@ public class RestoreJob extends AbstractJob {
for (Long beId : beToSnapshots.keySet()) {
List<SnapshotInfo> beSnapshotInfos = beToSnapshots.get(beId);
int totalNum = beSnapshotInfos.size();
// each backend allot at most 3 tasks
int batchNum = Math.min(totalNum, 3);
int batchNum = Math.min(totalNum, Config.restore_download_task_num_per_be);
// each task contains several upload sub tasks
int taskNumPerBatch = Math.max(totalNum / batchNum, 1);
LOG.debug("backend {} has {} batch, total {} tasks, {}",
@ -1490,8 +1490,7 @@ public class RestoreJob extends AbstractJob {
for (Long beId : beToSnapshots.keySet()) {
List<SnapshotInfo> beSnapshotInfos = beToSnapshots.get(beId);
int totalNum = beSnapshotInfos.size();
// each backend allot at most 3 tasks
int batchNum = Math.min(totalNum, 3);
int batchNum = Math.min(totalNum, Config.restore_download_task_num_per_be);
// each task contains several upload sub tasks
int taskNumPerBatch = Math.max(totalNum / batchNum, 1);