[Colocate] Fix bug that colocate group can not be redistributed after dropping a backend (#7020)

Mainly changes:

1. Fix [Bug] Colocate group can not redistributed after dropping a backend #7019
2. Add detail msg about why a colocate group is unstable.
3. Add more suggestion when upgrading Doris cluster.
This commit is contained in:
Mingyu Chen
2021-11-11 15:41:49 +08:00
committed by GitHub
parent cf085b8b1a
commit 58804d3570
11 changed files with 69 additions and 20 deletions

View File

@ -1,6 +1,6 @@
---
{
"title": "Multi-tenancy(Experimental)",
"title": "Multi-tenancy(Deprecated)",
"language": "en"
}
---
@ -24,9 +24,9 @@ specific language governing permissions and limitations
under the License.
-->
# Multi-tenancy(Experimental)
# Multi-tenancy(Deprecated)
This function is experimental and is not recommended for use in production environment.
This function is deprecated. Please see [Multi-Tenant](../multi-tenant.md).
## Background
Doris, as a PB-level online report and multi-dimensional analysis database, provides cloud-based database services through open cloud, and deploys a physical cluster for each client in the cloud. Internally, a physical cluster deploys multiple services, and separately builds clusters for services with high isolation requirements. In view of the above problems:

View File

@ -32,6 +32,25 @@ Doris can upgrade smoothly by rolling upgrades. The following steps are recommen
> Note:
> 1. The following approaches are based on highly available deployments. That is, data 3 replicas, FE high availability.
## Preparen
1. Turn off the replica repair and balance operation.
There will be node restarts during the upgrade process, so unnecessary cluster balancing and replica repair logic may be triggered. You can close it first with the following command:
```
# Turn off the replica ealance logic. After it is closed, the balancing operation of the ordinary table replica will no longer be triggered.
$ mysql-client> admin set frontend config("disable_balance" = "true");
# Turn off the replica balance logic of the colocation table. After it is closed, the replica redistribution operation of the colocation table will no longer be triggered.
$ mysql-client> admin set frontend config("disable_colocate_balance");
# Turn off the replica scheduling logic. After shutting down, all generated replica repair and balancing tasks will no longer be scheduled.
$ mysql-client> admin set frontend config("disable_tablet_scheduler" = "true");
```
After the cluster is upgraded, just use the above command to set the corresponding configuration to the original value.
## Test the correctness of BE upgrade
1. Arbitrarily select a BE node and deploy the latest palo_be binary file.

View File

@ -1,6 +1,6 @@
---
{
"title": "多租户(Experimental)",
"title": "多租户(弃用)",
"language": "zh-CN"
}
---
@ -24,9 +24,9 @@ specific language governing permissions and limitations
under the License.
-->
# 多租户(Experimental)
# 多租户(已弃用)
该功能为实验性质,暂不建议在生产环境使用
该功能已弃用。新方案请参阅:[多租户和资源划分](../multi-tenant.md)
## 背景
Doris 作为一款 PB 级别的在线报表与多维分析数据库,对外通过开放云提供云端的数据库服务,并且对于每个云上的客户都单独部署了一套物理集群。对内,一套物理集群部署了多个业务,对于隔离性要求比较高的业务单独搭建了集群。针对以上存在几点问题:

View File

@ -31,6 +31,25 @@ Doris 可以通过滚动升级的方式,平滑进行升级。建议按照以
> 注:
> 1. 以下方式均建立在高可用部署的情况下。即数据 3 副本,FE 高可用情况下。
## 前置工作
1. 关闭集群副本修复和均衡功能
升级过程中会有节点重启,所以可能会触发不必要的集群均衡和副本修复逻辑。可以先通过以下命令关闭:
```
# 关闭副本均衡逻辑。关闭后,不会再触发普通表副本的均衡操作。
$ mysql-client > admin set frontend config("disable_balance" = "true");
# 关闭 colocation 表的副本均衡逻辑。关闭后,不会再出发 colocation 表的副本重分布操作。
$ mysql-client > admin set frontend config("disable_colocate_balance");
# 关闭副本调度逻辑。关闭后,所有已产生的副本修复和均衡任务不会再被调度。
$ mysql-client > admin set frontend config("disable_tablet_scheduler" = "true");
```
当集群升级完毕后,在通过以上命令将对应配置设为原值即可。
## 测试 BE 升级正确性
1. 任意选择一个 BE 节点,部署最新的 palo_be 二进制文件。

View File

@ -5379,7 +5379,7 @@ public class Catalog {
}
// set this group as unstable
colocateTableIndex.markGroupUnstable(groupId, false /* edit log is along with modify table log */);
colocateTableIndex.markGroupUnstable(groupId, "Colocation group modified by user", false /* edit log is along with modify table log */);
table.setColocateGroup(colocateGroup);
} else {
// unset colocation group

View File

@ -177,6 +177,7 @@ public class ColocateTableIndex implements Writable {
tbl.getDefaultReplicaAllocation());
groupName2Id.put(fullGroupName, groupId);
group2Schema.put(groupId, groupSchema);
group2ErrMsgs.put(groupId, "");
}
group2Tables.put(groupId, tbl.getId());
table2Group.put(tbl.getId(), groupId);
@ -206,13 +207,14 @@ public class ColocateTableIndex implements Writable {
}
}
public void markGroupUnstable(GroupId groupId, boolean needEditLog) {
public void markGroupUnstable(GroupId groupId, String reason, boolean needEditLog) {
writeLock();
try {
if (!group2Tables.containsKey(groupId)) {
return;
}
if (unstableGroups.add(groupId)) {
group2ErrMsgs.put(groupId, Strings.nullToEmpty(reason));
if (needEditLog) {
ColocatePersistInfo info = ColocatePersistInfo.createForMarkUnstable(groupId);
Catalog.getCurrentCatalog().getEditLog().logColocateMarkUnstable(info);
@ -231,6 +233,7 @@ public class ColocateTableIndex implements Writable {
return;
}
if (unstableGroups.remove(groupId)) {
group2ErrMsgs.put(groupId, "");
if (needEditLog) {
ColocatePersistInfo info = ColocatePersistInfo.createForMarkStable(groupId);
Catalog.getCurrentCatalog().getEditLog().logColocateMarkStable(info);
@ -255,6 +258,7 @@ public class ColocateTableIndex implements Writable {
// all tables of this group are removed, remove the group
group2BackendsPerBucketSeq.rowMap().remove(groupId);
group2Schema.remove(groupId);
group2ErrMsgs.remove(groupId);
unstableGroups.remove(groupId);
String fullGroupName = null;
for (Map.Entry<String, GroupId> entry : groupName2Id.entrySet()) {
@ -537,7 +541,7 @@ public class ColocateTableIndex implements Writable {
}
public void replayMarkGroupUnstable(ColocatePersistInfo info) {
markGroupUnstable(info.getGroupId(), false);
markGroupUnstable(info.getGroupId(), "replay mark group unstable", false);
}
public void replayMarkGroupStable(ColocatePersistInfo info) {

View File

@ -48,6 +48,7 @@ import com.google.common.collect.Table;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.parquet.Strings;
import java.util.List;
import java.util.Map;
@ -161,6 +162,7 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon {
Catalog.getCurrentSystemInfo().checkReplicaAllocation(db.getClusterName(), replicaAlloc);
} catch (DdlException e) {
colocateIndex.setErrMsgForGroup(groupId, e.getMessage());
continue;
}
Map<Tag, Short> allocMap = replicaAlloc.getAllocMap();
@ -220,7 +222,7 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon {
continue;
}
boolean isGroupStable = true;
String unstableReason = null;
OUT: for (Long tableId : tableIds) {
OlapTable olapTable = (OlapTable) db.getTableNullable(tableId);
if (olapTable == null || !colocateIndex.isColocateTable(olapTable.getId())) {
@ -244,8 +246,8 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon {
Tablet tablet = index.getTablet(tabletId);
TabletStatus st = tablet.getColocateHealthStatus(visibleVersion, replicaAlloc, bucketsSeq);
if (st != TabletStatus.HEALTHY) {
isGroupStable = false;
LOG.debug("get unhealthy tablet {} in colocate table. status: {}", tablet.getId(), st);
unstableReason = String.format("get unhealthy tablet %d in colocate table. status: %s", tablet.getId(), st);
LOG.debug(unstableReason);
if (!tablet.readyToBeRepaired(Priority.HIGH)) {
continue;
@ -279,10 +281,10 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon {
} // end for tables
// mark group as stable or unstable
if (isGroupStable) {
if (Strings.isNullOrEmpty(unstableReason)) {
colocateIndex.markGroupStable(groupId, true);
} else {
colocateIndex.markGroupUnstable(groupId, true);
colocateIndex.markGroupUnstable(groupId, unstableReason, true);
}
} // end for groups
}
@ -470,10 +472,12 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon {
for (Long beId : backendIds) {
Backend be = infoService.getBackend(beId);
if (be == null) {
LOG.info("backend {} does not exist", beId);
return null;
// For non-exist BE(maybe dropped), add a ip 0.0.0.0
// And the following logic will handle the non-exist host.
hosts.add(Backend.DUMMY_IP);
} else {
hosts.add(be.getHost());
}
hosts.add(be.getHost());
}
hostsPerBucketSeq.add(hosts);
}

View File

@ -57,7 +57,7 @@ public class ReplicasProcNode implements ProcNodeInterface {
result.setNames(TITLE_NAMES);
for (Replica replica : replicas) {
Backend be = backendMap.get(replica.getBackendId());
String host = (be == null ? "0.0.0.0" : be.getHost());
String host = (be == null ? Backend.DUMMY_IP : be.getHost());
int port = (be == null ? 0 : be.getHttpPort());
String metaUrl = String.format("http://%s:%d/api/meta/header/%d/%d",
host, port,

View File

@ -147,7 +147,7 @@ public class ColocateMetaService {
HttpMethod method = request.getRequest().method();
if (method.equals(HttpMethod.POST)) {
colocateIndex.markGroupUnstable(groupId, true);
colocateIndex.markGroupUnstable(groupId, "mark unstable via http api", true);
} else if (method.equals(HttpMethod.DELETE)) {
colocateIndex.markGroupStable(groupId, true);
} else {

View File

@ -110,7 +110,7 @@ public class ColocateMetaService extends RestBaseController {
String method = request.getMethod();
if ("POST".equalsIgnoreCase(method)) {
colocateIndex.markGroupUnstable(groupId, true);
colocateIndex.markGroupUnstable(groupId, "mark unstable via http api", true);
} else if ("DELETE".equalsIgnoreCase(method)) {
colocateIndex.markGroupStable(groupId, true);
}

View File

@ -53,6 +53,9 @@ import java.util.concurrent.atomic.AtomicBoolean;
*/
public class Backend implements Writable {
// Represent a meaningless IP
public static final String DUMMY_IP = "0.0.0.0";
public enum BackendState {
using, /* backend is belong to a cluster*/
offline,