From 04415d0b357310927a4c3f0d006fc52ed4acc705 Mon Sep 17 00:00:00 2001 From: qiye Date: Thu, 25 May 2023 13:39:42 +0800 Subject: [PATCH] [opt](balance) add config balance_slot_num_per_path (#19869) Make balance_slot_num_per_path configurable. --- docs/en/docs/admin-manual/config/fe-config.md | 10 ++++++++++ docs/zh-CN/docs/admin-manual/config/fe-config.md | 12 +++++++++++- .../main/java/org/apache/doris/common/Config.java | 4 ++++ .../org/apache/doris/clone/BeLoadRebalancer.java | 2 +- .../java/org/apache/doris/clone/DiskRebalancer.java | 3 ++- .../java/org/apache/doris/clone/TabletScheduler.java | 9 +++------ 6 files changed, 31 insertions(+), 9 deletions(-) diff --git a/docs/en/docs/admin-manual/config/fe-config.md b/docs/en/docs/admin-manual/config/fe-config.md index 2d35568f49..f25d233de6 100644 --- a/docs/en/docs/admin-manual/config/fe-config.md +++ b/docs/en/docs/admin-manual/config/fe-config.md @@ -1965,6 +1965,16 @@ This configs can set to true to disable the automatic colocate tables's relocate 2. Because once the balance is turned off, the unstable colocate table may not be restored 3. Eventually the colocate plan cannot be used when querying. +#### `balance_slot_num_per_path` + +Default: 1 + +IsMutable:true + +MasterOnly:true + +Default number of slots per path during balance. + #### `disable_tablet_scheduler` Default:false diff --git a/docs/zh-CN/docs/admin-manual/config/fe-config.md b/docs/zh-CN/docs/admin-manual/config/fe-config.md index fb440bfa1d..a0b6ff809d 100644 --- a/docs/zh-CN/docs/admin-manual/config/fe-config.md +++ b/docs/zh-CN/docs/admin-manual/config/fe-config.md @@ -1969,6 +1969,16 @@ BE副本数的平衡阈值。 2. 因为一旦关闭平衡,不稳定的 colocate 表可能无法恢复 3. 最终查询时无法使用 colocate 计划。 +#### `balance_slot_num_per_path` + +默认值:1 + +是否可以动态配置:true + +是否为 Master FE 节点独有的配置项:true + +balance 时每个路径的默认 slot 数量 + #### `disable_tablet_scheduler` 默认值:false @@ -2722,4 +2732,4 @@ show data (其他用法:HELP SHOW DATA) 是否为 Master FE 节点独有的配置项:false -控制是否启用查询命中率统计。默认为 false。 \ No newline at end of file +控制是否启用查询命中率统计。默认为 false。 diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 41e5eae2af..d9a91c7f49 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -944,6 +944,10 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true) public static int partition_rebalance_max_moves_num_per_selection = 10; + // 1 slot for reduce unnecessary balance task, provided a more accurate estimate of capacity + @ConfField(masterOnly = true, mutable = true) + public static int balance_slot_num_per_path = 1; + // This threshold is to avoid piling up too many report task in FE, which may cause OOM exception. // In some large Doris cluster, eg: 100 Backends with ten million replicas, a tablet report may cost // several seconds after some modification of metadata(drop partition, etc..). diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java index 693f8d83bc..ebbebe6806 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java @@ -128,7 +128,7 @@ public class BeLoadRebalancer extends Rebalancer { // for each path, we try to select at most BALANCE_SLOT_NUM_FOR_PATH tablets Map remainingPaths = Maps.newHashMap(); for (Long pathHash : pathHigh) { - remainingPaths.put(pathHash, TabletScheduler.BALANCE_SLOT_NUM_FOR_PATH); + remainingPaths.put(pathHash, Config.balance_slot_num_per_path); } if (remainingPaths.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/DiskRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/DiskRebalancer.java index 3e3d5a0828..9d676d950c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/DiskRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/DiskRebalancer.java @@ -24,6 +24,7 @@ import org.apache.doris.clone.SchedException.Status; import org.apache.doris.clone.TabletSchedCtx.BalanceType; import org.apache.doris.clone.TabletSchedCtx.Priority; import org.apache.doris.clone.TabletScheduler.PathSlot; +import org.apache.doris.common.Config; import org.apache.doris.system.SystemInfoService; import org.apache.doris.thrift.TStorageMedium; @@ -170,7 +171,7 @@ public class DiskRebalancer extends Rebalancer { // for each path, we try to select at most BALANCE_SLOT_NUM_FOR_PATH tablets Map remainingPaths = Maps.newHashMap(); for (Long pathHash : pathHigh) { - remainingPaths.put(pathHash, TabletScheduler.BALANCE_SLOT_NUM_FOR_PATH); + remainingPaths.put(pathHash, Config.balance_slot_num_per_path); } if (remainingPaths.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index 1393d30a75..e4c30747aa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -102,9 +102,6 @@ public class TabletScheduler extends MasterDaemon { private static final long SCHEDULE_INTERVAL_MS = 1000; // 1s - // 1 slot for reduce unnecessary balance task, provided a more accurate estimate of capacity - public static final int BALANCE_SLOT_NUM_FOR_PATH = 1; - /* * Tablet is added to pendingTablets as well it's id in allTabletIds. * TabletScheduler will take tablet from pendingTablets but will not remove it's id from allTabletIds when @@ -1851,7 +1848,7 @@ public class TabletScheduler extends MasterDaemon { public Slot(int total) { this.total = total; this.available = total; - this.balanceSlot = BALANCE_SLOT_NUM_FOR_PATH; + this.balanceSlot = Config.balance_slot_num_per_path; } public void rectify() { @@ -1862,8 +1859,8 @@ public class TabletScheduler extends MasterDaemon { available = total; } - if (balanceSlot > BALANCE_SLOT_NUM_FOR_PATH) { - balanceSlot = BALANCE_SLOT_NUM_FOR_PATH; + if (balanceSlot > Config.balance_slot_num_per_path) { + balanceSlot = Config.balance_slot_num_per_path; } }