[opt](balance) add config balance_slot_num_per_path (#19869)

Make balance_slot_num_per_path configurable.
This commit is contained in:
qiye
2023-05-25 13:39:42 +08:00
committed by GitHub
parent 99e0f7b184
commit 04415d0b35
6 changed files with 31 additions and 9 deletions

View File

@ -1965,6 +1965,16 @@ This configs can set to true to disable the automatic colocate tables's relocate
2. Because once the balance is turned off, the unstable colocate table may not be restored
3. Eventually the colocate plan cannot be used when querying.
#### `balance_slot_num_per_path`
Default: 1
IsMutable:true
MasterOnly:true
Default number of slots per path during balance.
#### `disable_tablet_scheduler`
Default:false

View File

@ -1969,6 +1969,16 @@ BE副本数的平衡阈值。
2. 因为一旦关闭平衡,不稳定的 colocate 表可能无法恢复
3. 最终查询时无法使用 colocate 计划。
#### `balance_slot_num_per_path`
默认值:1
是否可以动态配置:true
是否为 Master FE 节点独有的配置项:true
balance 时每个路径的默认 slot 数量
#### `disable_tablet_scheduler`
默认值:false
@ -2722,4 +2732,4 @@ show data (其他用法:HELP SHOW DATA)
是否为 Master FE 节点独有的配置项:false
控制是否启用查询命中率统计。默认为 false。
控制是否启用查询命中率统计。默认为 false。

View File

@ -944,6 +944,10 @@ public class Config extends ConfigBase {
@ConfField(mutable = true, masterOnly = true)
public static int partition_rebalance_max_moves_num_per_selection = 10;
// 1 slot for reduce unnecessary balance task, provided a more accurate estimate of capacity
@ConfField(masterOnly = true, mutable = true)
public static int balance_slot_num_per_path = 1;
// This threshold is to avoid piling up too many report task in FE, which may cause OOM exception.
// In some large Doris cluster, eg: 100 Backends with ten million replicas, a tablet report may cost
// several seconds after some modification of metadata(drop partition, etc..).

View File

@ -128,7 +128,7 @@ public class BeLoadRebalancer extends Rebalancer {
// for each path, we try to select at most BALANCE_SLOT_NUM_FOR_PATH tablets
Map<Long, Integer> remainingPaths = Maps.newHashMap();
for (Long pathHash : pathHigh) {
remainingPaths.put(pathHash, TabletScheduler.BALANCE_SLOT_NUM_FOR_PATH);
remainingPaths.put(pathHash, Config.balance_slot_num_per_path);
}
if (remainingPaths.isEmpty()) {

View File

@ -24,6 +24,7 @@ import org.apache.doris.clone.SchedException.Status;
import org.apache.doris.clone.TabletSchedCtx.BalanceType;
import org.apache.doris.clone.TabletSchedCtx.Priority;
import org.apache.doris.clone.TabletScheduler.PathSlot;
import org.apache.doris.common.Config;
import org.apache.doris.system.SystemInfoService;
import org.apache.doris.thrift.TStorageMedium;
@ -170,7 +171,7 @@ public class DiskRebalancer extends Rebalancer {
// for each path, we try to select at most BALANCE_SLOT_NUM_FOR_PATH tablets
Map<Long, Integer> remainingPaths = Maps.newHashMap();
for (Long pathHash : pathHigh) {
remainingPaths.put(pathHash, TabletScheduler.BALANCE_SLOT_NUM_FOR_PATH);
remainingPaths.put(pathHash, Config.balance_slot_num_per_path);
}
if (remainingPaths.isEmpty()) {

View File

@ -102,9 +102,6 @@ public class TabletScheduler extends MasterDaemon {
private static final long SCHEDULE_INTERVAL_MS = 1000; // 1s
// 1 slot for reduce unnecessary balance task, provided a more accurate estimate of capacity
public static final int BALANCE_SLOT_NUM_FOR_PATH = 1;
/*
* Tablet is added to pendingTablets as well it's id in allTabletIds.
* TabletScheduler will take tablet from pendingTablets but will not remove it's id from allTabletIds when
@ -1851,7 +1848,7 @@ public class TabletScheduler extends MasterDaemon {
public Slot(int total) {
this.total = total;
this.available = total;
this.balanceSlot = BALANCE_SLOT_NUM_FOR_PATH;
this.balanceSlot = Config.balance_slot_num_per_path;
}
public void rectify() {
@ -1862,8 +1859,8 @@ public class TabletScheduler extends MasterDaemon {
available = total;
}
if (balanceSlot > BALANCE_SLOT_NUM_FOR_PATH) {
balanceSlot = BALANCE_SLOT_NUM_FOR_PATH;
if (balanceSlot > Config.balance_slot_num_per_path) {
balanceSlot = Config.balance_slot_num_per_path;
}
}