[Colocate] Fix bug that colocate group can not be redistributed after dropping a backend (#7020)
Mainly changes: 1. Fix [Bug] Colocate group can not redistributed after dropping a backend #7019 2. Add detail msg about why a colocate group is unstable. 3. Add more suggestion when upgrading Doris cluster.
This commit is contained in:
@ -1,6 +1,6 @@
|
||||
---
|
||||
{
|
||||
"title": "Multi-tenancy(Experimental)",
|
||||
"title": "Multi-tenancy(Deprecated)",
|
||||
"language": "en"
|
||||
}
|
||||
---
|
||||
@ -24,9 +24,9 @@ specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
# Multi-tenancy(Experimental)
|
||||
# Multi-tenancy(Deprecated)
|
||||
|
||||
This function is experimental and is not recommended for use in production environment.
|
||||
This function is deprecated. Please see [Multi-Tenant](../multi-tenant.md).
|
||||
|
||||
## Background
|
||||
Doris, as a PB-level online report and multi-dimensional analysis database, provides cloud-based database services through open cloud, and deploys a physical cluster for each client in the cloud. Internally, a physical cluster deploys multiple services, and separately builds clusters for services with high isolation requirements. In view of the above problems:
|
||||
|
||||
@ -32,6 +32,25 @@ Doris can upgrade smoothly by rolling upgrades. The following steps are recommen
|
||||
> Note:
|
||||
> 1. The following approaches are based on highly available deployments. That is, data 3 replicas, FE high availability.
|
||||
|
||||
## Preparen
|
||||
|
||||
1. Turn off the replica repair and balance operation.
|
||||
|
||||
There will be node restarts during the upgrade process, so unnecessary cluster balancing and replica repair logic may be triggered. You can close it first with the following command:
|
||||
|
||||
```
|
||||
# Turn off the replica ealance logic. After it is closed, the balancing operation of the ordinary table replica will no longer be triggered.
|
||||
$ mysql-client> admin set frontend config("disable_balance" = "true");
|
||||
|
||||
# Turn off the replica balance logic of the colocation table. After it is closed, the replica redistribution operation of the colocation table will no longer be triggered.
|
||||
$ mysql-client> admin set frontend config("disable_colocate_balance");
|
||||
|
||||
# Turn off the replica scheduling logic. After shutting down, all generated replica repair and balancing tasks will no longer be scheduled.
|
||||
$ mysql-client> admin set frontend config("disable_tablet_scheduler" = "true");
|
||||
```
|
||||
|
||||
After the cluster is upgraded, just use the above command to set the corresponding configuration to the original value.
|
||||
|
||||
## Test the correctness of BE upgrade
|
||||
|
||||
1. Arbitrarily select a BE node and deploy the latest palo_be binary file.
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
---
|
||||
{
|
||||
"title": "多租户(Experimental)",
|
||||
"title": "多租户(弃用)",
|
||||
"language": "zh-CN"
|
||||
}
|
||||
---
|
||||
@ -24,9 +24,9 @@ specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
# 多租户(Experimental)
|
||||
# 多租户(已弃用)
|
||||
|
||||
该功能为实验性质,暂不建议在生产环境使用。
|
||||
该功能已弃用。新方案请参阅:[多租户和资源划分](../multi-tenant.md)。
|
||||
|
||||
## 背景
|
||||
Doris 作为一款 PB 级别的在线报表与多维分析数据库,对外通过开放云提供云端的数据库服务,并且对于每个云上的客户都单独部署了一套物理集群。对内,一套物理集群部署了多个业务,对于隔离性要求比较高的业务单独搭建了集群。针对以上存在几点问题:
|
||||
|
||||
@ -31,6 +31,25 @@ Doris 可以通过滚动升级的方式,平滑进行升级。建议按照以
|
||||
> 注:
|
||||
> 1. 以下方式均建立在高可用部署的情况下。即数据 3 副本,FE 高可用情况下。
|
||||
|
||||
## 前置工作
|
||||
|
||||
1. 关闭集群副本修复和均衡功能
|
||||
|
||||
升级过程中会有节点重启,所以可能会触发不必要的集群均衡和副本修复逻辑。可以先通过以下命令关闭:
|
||||
|
||||
```
|
||||
# 关闭副本均衡逻辑。关闭后,不会再触发普通表副本的均衡操作。
|
||||
$ mysql-client > admin set frontend config("disable_balance" = "true");
|
||||
|
||||
# 关闭 colocation 表的副本均衡逻辑。关闭后,不会再出发 colocation 表的副本重分布操作。
|
||||
$ mysql-client > admin set frontend config("disable_colocate_balance");
|
||||
|
||||
# 关闭副本调度逻辑。关闭后,所有已产生的副本修复和均衡任务不会再被调度。
|
||||
$ mysql-client > admin set frontend config("disable_tablet_scheduler" = "true");
|
||||
```
|
||||
|
||||
当集群升级完毕后,在通过以上命令将对应配置设为原值即可。
|
||||
|
||||
## 测试 BE 升级正确性
|
||||
|
||||
1. 任意选择一个 BE 节点,部署最新的 palo_be 二进制文件。
|
||||
|
||||
@ -5379,7 +5379,7 @@ public class Catalog {
|
||||
}
|
||||
|
||||
// set this group as unstable
|
||||
colocateTableIndex.markGroupUnstable(groupId, false /* edit log is along with modify table log */);
|
||||
colocateTableIndex.markGroupUnstable(groupId, "Colocation group modified by user", false /* edit log is along with modify table log */);
|
||||
table.setColocateGroup(colocateGroup);
|
||||
} else {
|
||||
// unset colocation group
|
||||
|
||||
@ -177,6 +177,7 @@ public class ColocateTableIndex implements Writable {
|
||||
tbl.getDefaultReplicaAllocation());
|
||||
groupName2Id.put(fullGroupName, groupId);
|
||||
group2Schema.put(groupId, groupSchema);
|
||||
group2ErrMsgs.put(groupId, "");
|
||||
}
|
||||
group2Tables.put(groupId, tbl.getId());
|
||||
table2Group.put(tbl.getId(), groupId);
|
||||
@ -206,13 +207,14 @@ public class ColocateTableIndex implements Writable {
|
||||
}
|
||||
}
|
||||
|
||||
public void markGroupUnstable(GroupId groupId, boolean needEditLog) {
|
||||
public void markGroupUnstable(GroupId groupId, String reason, boolean needEditLog) {
|
||||
writeLock();
|
||||
try {
|
||||
if (!group2Tables.containsKey(groupId)) {
|
||||
return;
|
||||
}
|
||||
if (unstableGroups.add(groupId)) {
|
||||
group2ErrMsgs.put(groupId, Strings.nullToEmpty(reason));
|
||||
if (needEditLog) {
|
||||
ColocatePersistInfo info = ColocatePersistInfo.createForMarkUnstable(groupId);
|
||||
Catalog.getCurrentCatalog().getEditLog().logColocateMarkUnstable(info);
|
||||
@ -231,6 +233,7 @@ public class ColocateTableIndex implements Writable {
|
||||
return;
|
||||
}
|
||||
if (unstableGroups.remove(groupId)) {
|
||||
group2ErrMsgs.put(groupId, "");
|
||||
if (needEditLog) {
|
||||
ColocatePersistInfo info = ColocatePersistInfo.createForMarkStable(groupId);
|
||||
Catalog.getCurrentCatalog().getEditLog().logColocateMarkStable(info);
|
||||
@ -255,6 +258,7 @@ public class ColocateTableIndex implements Writable {
|
||||
// all tables of this group are removed, remove the group
|
||||
group2BackendsPerBucketSeq.rowMap().remove(groupId);
|
||||
group2Schema.remove(groupId);
|
||||
group2ErrMsgs.remove(groupId);
|
||||
unstableGroups.remove(groupId);
|
||||
String fullGroupName = null;
|
||||
for (Map.Entry<String, GroupId> entry : groupName2Id.entrySet()) {
|
||||
@ -537,7 +541,7 @@ public class ColocateTableIndex implements Writable {
|
||||
}
|
||||
|
||||
public void replayMarkGroupUnstable(ColocatePersistInfo info) {
|
||||
markGroupUnstable(info.getGroupId(), false);
|
||||
markGroupUnstable(info.getGroupId(), "replay mark group unstable", false);
|
||||
}
|
||||
|
||||
public void replayMarkGroupStable(ColocatePersistInfo info) {
|
||||
|
||||
@ -48,6 +48,7 @@ import com.google.common.collect.Table;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.parquet.Strings;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -161,6 +162,7 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon {
|
||||
Catalog.getCurrentSystemInfo().checkReplicaAllocation(db.getClusterName(), replicaAlloc);
|
||||
} catch (DdlException e) {
|
||||
colocateIndex.setErrMsgForGroup(groupId, e.getMessage());
|
||||
continue;
|
||||
}
|
||||
Map<Tag, Short> allocMap = replicaAlloc.getAllocMap();
|
||||
|
||||
@ -220,7 +222,7 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon {
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean isGroupStable = true;
|
||||
String unstableReason = null;
|
||||
OUT: for (Long tableId : tableIds) {
|
||||
OlapTable olapTable = (OlapTable) db.getTableNullable(tableId);
|
||||
if (olapTable == null || !colocateIndex.isColocateTable(olapTable.getId())) {
|
||||
@ -244,8 +246,8 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon {
|
||||
Tablet tablet = index.getTablet(tabletId);
|
||||
TabletStatus st = tablet.getColocateHealthStatus(visibleVersion, replicaAlloc, bucketsSeq);
|
||||
if (st != TabletStatus.HEALTHY) {
|
||||
isGroupStable = false;
|
||||
LOG.debug("get unhealthy tablet {} in colocate table. status: {}", tablet.getId(), st);
|
||||
unstableReason = String.format("get unhealthy tablet %d in colocate table. status: %s", tablet.getId(), st);
|
||||
LOG.debug(unstableReason);
|
||||
|
||||
if (!tablet.readyToBeRepaired(Priority.HIGH)) {
|
||||
continue;
|
||||
@ -279,10 +281,10 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon {
|
||||
} // end for tables
|
||||
|
||||
// mark group as stable or unstable
|
||||
if (isGroupStable) {
|
||||
if (Strings.isNullOrEmpty(unstableReason)) {
|
||||
colocateIndex.markGroupStable(groupId, true);
|
||||
} else {
|
||||
colocateIndex.markGroupUnstable(groupId, true);
|
||||
colocateIndex.markGroupUnstable(groupId, unstableReason, true);
|
||||
}
|
||||
} // end for groups
|
||||
}
|
||||
@ -470,10 +472,12 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon {
|
||||
for (Long beId : backendIds) {
|
||||
Backend be = infoService.getBackend(beId);
|
||||
if (be == null) {
|
||||
LOG.info("backend {} does not exist", beId);
|
||||
return null;
|
||||
// For non-exist BE(maybe dropped), add a ip 0.0.0.0
|
||||
// And the following logic will handle the non-exist host.
|
||||
hosts.add(Backend.DUMMY_IP);
|
||||
} else {
|
||||
hosts.add(be.getHost());
|
||||
}
|
||||
hosts.add(be.getHost());
|
||||
}
|
||||
hostsPerBucketSeq.add(hosts);
|
||||
}
|
||||
|
||||
@ -57,7 +57,7 @@ public class ReplicasProcNode implements ProcNodeInterface {
|
||||
result.setNames(TITLE_NAMES);
|
||||
for (Replica replica : replicas) {
|
||||
Backend be = backendMap.get(replica.getBackendId());
|
||||
String host = (be == null ? "0.0.0.0" : be.getHost());
|
||||
String host = (be == null ? Backend.DUMMY_IP : be.getHost());
|
||||
int port = (be == null ? 0 : be.getHttpPort());
|
||||
String metaUrl = String.format("http://%s:%d/api/meta/header/%d/%d",
|
||||
host, port,
|
||||
|
||||
@ -147,7 +147,7 @@ public class ColocateMetaService {
|
||||
|
||||
HttpMethod method = request.getRequest().method();
|
||||
if (method.equals(HttpMethod.POST)) {
|
||||
colocateIndex.markGroupUnstable(groupId, true);
|
||||
colocateIndex.markGroupUnstable(groupId, "mark unstable via http api", true);
|
||||
} else if (method.equals(HttpMethod.DELETE)) {
|
||||
colocateIndex.markGroupStable(groupId, true);
|
||||
} else {
|
||||
|
||||
@ -110,7 +110,7 @@ public class ColocateMetaService extends RestBaseController {
|
||||
|
||||
String method = request.getMethod();
|
||||
if ("POST".equalsIgnoreCase(method)) {
|
||||
colocateIndex.markGroupUnstable(groupId, true);
|
||||
colocateIndex.markGroupUnstable(groupId, "mark unstable via http api", true);
|
||||
} else if ("DELETE".equalsIgnoreCase(method)) {
|
||||
colocateIndex.markGroupStable(groupId, true);
|
||||
}
|
||||
|
||||
@ -53,6 +53,9 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
||||
*/
|
||||
public class Backend implements Writable {
|
||||
|
||||
// Represent a meaningless IP
|
||||
public static final String DUMMY_IP = "0.0.0.0";
|
||||
|
||||
public enum BackendState {
|
||||
using, /* backend is belong to a cluster*/
|
||||
offline,
|
||||
|
||||
Reference in New Issue
Block a user