diff --git a/docs/en/administrator-guide/operation/multi-tenant.md b/docs/en/administrator-guide/operation/multi-tenant.md index bb2a06a683..b47a523e5e 100644 --- a/docs/en/administrator-guide/operation/multi-tenant.md +++ b/docs/en/administrator-guide/operation/multi-tenant.md @@ -1,6 +1,6 @@ --- { - "title": "Multi-tenancy(Experimental)", + "title": "Multi-tenancy(Deprecated)", "language": "en" } --- @@ -24,9 +24,9 @@ specific language governing permissions and limitations under the License. --> -# Multi-tenancy(Experimental) +# Multi-tenancy(Deprecated) -This function is experimental and is not recommended for use in production environment. +This function is deprecated. Please see [Multi-Tenant](../multi-tenant.md). ## Background Doris, as a PB-level online report and multi-dimensional analysis database, provides cloud-based database services through open cloud, and deploys a physical cluster for each client in the cloud. Internally, a physical cluster deploys multiple services, and separately builds clusters for services with high isolation requirements. In view of the above problems: diff --git a/docs/en/installing/upgrade.md b/docs/en/installing/upgrade.md index be2cde0e53..47b31187b1 100644 --- a/docs/en/installing/upgrade.md +++ b/docs/en/installing/upgrade.md @@ -32,6 +32,25 @@ Doris can upgrade smoothly by rolling upgrades. The following steps are recommen > Note: > 1. The following approaches are based on highly available deployments. That is, data 3 replicas, FE high availability. +## Preparen + +1. Turn off the replica repair and balance operation. + + There will be node restarts during the upgrade process, so unnecessary cluster balancing and replica repair logic may be triggered. You can close it first with the following command: + + ``` + # Turn off the replica ealance logic. After it is closed, the balancing operation of the ordinary table replica will no longer be triggered. + $ mysql-client> admin set frontend config("disable_balance" = "true"); + + # Turn off the replica balance logic of the colocation table. After it is closed, the replica redistribution operation of the colocation table will no longer be triggered. + $ mysql-client> admin set frontend config("disable_colocate_balance"); + + # Turn off the replica scheduling logic. After shutting down, all generated replica repair and balancing tasks will no longer be scheduled. + $ mysql-client> admin set frontend config("disable_tablet_scheduler" = "true"); + ``` + + After the cluster is upgraded, just use the above command to set the corresponding configuration to the original value. + ## Test the correctness of BE upgrade 1. Arbitrarily select a BE node and deploy the latest palo_be binary file. diff --git a/docs/zh-CN/administrator-guide/operation/multi-tenant.md b/docs/zh-CN/administrator-guide/operation/multi-tenant.md index 176d600d70..dc818cd3ff 100644 --- a/docs/zh-CN/administrator-guide/operation/multi-tenant.md +++ b/docs/zh-CN/administrator-guide/operation/multi-tenant.md @@ -1,6 +1,6 @@ --- { - "title": "多租户(Experimental)", + "title": "多租户(弃用)", "language": "zh-CN" } --- @@ -24,9 +24,9 @@ specific language governing permissions and limitations under the License. --> -# 多租户(Experimental) +# 多租户(已弃用) -该功能为实验性质,暂不建议在生产环境使用。 +该功能已弃用。新方案请参阅:[多租户和资源划分](../multi-tenant.md)。 ## 背景 Doris 作为一款 PB 级别的在线报表与多维分析数据库,对外通过开放云提供云端的数据库服务,并且对于每个云上的客户都单独部署了一套物理集群。对内,一套物理集群部署了多个业务,对于隔离性要求比较高的业务单独搭建了集群。针对以上存在几点问题: diff --git a/docs/zh-CN/installing/upgrade.md b/docs/zh-CN/installing/upgrade.md index 3315a06476..e6d934d054 100644 --- a/docs/zh-CN/installing/upgrade.md +++ b/docs/zh-CN/installing/upgrade.md @@ -31,6 +31,25 @@ Doris 可以通过滚动升级的方式,平滑进行升级。建议按照以 > 注: > 1. 以下方式均建立在高可用部署的情况下。即数据 3 副本,FE 高可用情况下。 +## 前置工作 + +1. 关闭集群副本修复和均衡功能 + + 升级过程中会有节点重启,所以可能会触发不必要的集群均衡和副本修复逻辑。可以先通过以下命令关闭: + + ``` + # 关闭副本均衡逻辑。关闭后,不会再触发普通表副本的均衡操作。 + $ mysql-client > admin set frontend config("disable_balance" = "true"); + + # 关闭 colocation 表的副本均衡逻辑。关闭后,不会再出发 colocation 表的副本重分布操作。 + $ mysql-client > admin set frontend config("disable_colocate_balance"); + + # 关闭副本调度逻辑。关闭后,所有已产生的副本修复和均衡任务不会再被调度。 + $ mysql-client > admin set frontend config("disable_tablet_scheduler" = "true"); + ``` + + 当集群升级完毕后,在通过以上命令将对应配置设为原值即可。 + ## 测试 BE 升级正确性 1. 任意选择一个 BE 节点,部署最新的 palo_be 二进制文件。 diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java index eb04db8e09..5c42ea340a 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java @@ -5379,7 +5379,7 @@ public class Catalog { } // set this group as unstable - colocateTableIndex.markGroupUnstable(groupId, false /* edit log is along with modify table log */); + colocateTableIndex.markGroupUnstable(groupId, "Colocation group modified by user", false /* edit log is along with modify table log */); table.setColocateGroup(colocateGroup); } else { // unset colocation group diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java index 0aca4aca57..88d221b6b9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java @@ -177,6 +177,7 @@ public class ColocateTableIndex implements Writable { tbl.getDefaultReplicaAllocation()); groupName2Id.put(fullGroupName, groupId); group2Schema.put(groupId, groupSchema); + group2ErrMsgs.put(groupId, ""); } group2Tables.put(groupId, tbl.getId()); table2Group.put(tbl.getId(), groupId); @@ -206,13 +207,14 @@ public class ColocateTableIndex implements Writable { } } - public void markGroupUnstable(GroupId groupId, boolean needEditLog) { + public void markGroupUnstable(GroupId groupId, String reason, boolean needEditLog) { writeLock(); try { if (!group2Tables.containsKey(groupId)) { return; } if (unstableGroups.add(groupId)) { + group2ErrMsgs.put(groupId, Strings.nullToEmpty(reason)); if (needEditLog) { ColocatePersistInfo info = ColocatePersistInfo.createForMarkUnstable(groupId); Catalog.getCurrentCatalog().getEditLog().logColocateMarkUnstable(info); @@ -231,6 +233,7 @@ public class ColocateTableIndex implements Writable { return; } if (unstableGroups.remove(groupId)) { + group2ErrMsgs.put(groupId, ""); if (needEditLog) { ColocatePersistInfo info = ColocatePersistInfo.createForMarkStable(groupId); Catalog.getCurrentCatalog().getEditLog().logColocateMarkStable(info); @@ -255,6 +258,7 @@ public class ColocateTableIndex implements Writable { // all tables of this group are removed, remove the group group2BackendsPerBucketSeq.rowMap().remove(groupId); group2Schema.remove(groupId); + group2ErrMsgs.remove(groupId); unstableGroups.remove(groupId); String fullGroupName = null; for (Map.Entry entry : groupName2Id.entrySet()) { @@ -537,7 +541,7 @@ public class ColocateTableIndex implements Writable { } public void replayMarkGroupUnstable(ColocatePersistInfo info) { - markGroupUnstable(info.getGroupId(), false); + markGroupUnstable(info.getGroupId(), "replay mark group unstable", false); } public void replayMarkGroupStable(ColocatePersistInfo info) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java index b0d06d18bf..14676d2ac3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java @@ -48,6 +48,7 @@ import com.google.common.collect.Table; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.parquet.Strings; import java.util.List; import java.util.Map; @@ -161,6 +162,7 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { Catalog.getCurrentSystemInfo().checkReplicaAllocation(db.getClusterName(), replicaAlloc); } catch (DdlException e) { colocateIndex.setErrMsgForGroup(groupId, e.getMessage()); + continue; } Map allocMap = replicaAlloc.getAllocMap(); @@ -220,7 +222,7 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { continue; } - boolean isGroupStable = true; + String unstableReason = null; OUT: for (Long tableId : tableIds) { OlapTable olapTable = (OlapTable) db.getTableNullable(tableId); if (olapTable == null || !colocateIndex.isColocateTable(olapTable.getId())) { @@ -244,8 +246,8 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { Tablet tablet = index.getTablet(tabletId); TabletStatus st = tablet.getColocateHealthStatus(visibleVersion, replicaAlloc, bucketsSeq); if (st != TabletStatus.HEALTHY) { - isGroupStable = false; - LOG.debug("get unhealthy tablet {} in colocate table. status: {}", tablet.getId(), st); + unstableReason = String.format("get unhealthy tablet %d in colocate table. status: %s", tablet.getId(), st); + LOG.debug(unstableReason); if (!tablet.readyToBeRepaired(Priority.HIGH)) { continue; @@ -279,10 +281,10 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { } // end for tables // mark group as stable or unstable - if (isGroupStable) { + if (Strings.isNullOrEmpty(unstableReason)) { colocateIndex.markGroupStable(groupId, true); } else { - colocateIndex.markGroupUnstable(groupId, true); + colocateIndex.markGroupUnstable(groupId, unstableReason, true); } } // end for groups } @@ -470,10 +472,12 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { for (Long beId : backendIds) { Backend be = infoService.getBackend(beId); if (be == null) { - LOG.info("backend {} does not exist", beId); - return null; + // For non-exist BE(maybe dropped), add a ip 0.0.0.0 + // And the following logic will handle the non-exist host. + hosts.add(Backend.DUMMY_IP); + } else { + hosts.add(be.getHost()); } - hosts.add(be.getHost()); } hostsPerBucketSeq.add(hosts); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java index a8d117dc86..eabce8d882 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java @@ -57,7 +57,7 @@ public class ReplicasProcNode implements ProcNodeInterface { result.setNames(TITLE_NAMES); for (Replica replica : replicas) { Backend be = backendMap.get(replica.getBackendId()); - String host = (be == null ? "0.0.0.0" : be.getHost()); + String host = (be == null ? Backend.DUMMY_IP : be.getHost()); int port = (be == null ? 0 : be.getHttpPort()); String metaUrl = String.format("http://%s:%d/api/meta/header/%d/%d", host, port, diff --git a/fe/fe-core/src/main/java/org/apache/doris/http/meta/ColocateMetaService.java b/fe/fe-core/src/main/java/org/apache/doris/http/meta/ColocateMetaService.java index 777586e3ec..42b713d13c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/http/meta/ColocateMetaService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/http/meta/ColocateMetaService.java @@ -147,7 +147,7 @@ public class ColocateMetaService { HttpMethod method = request.getRequest().method(); if (method.equals(HttpMethod.POST)) { - colocateIndex.markGroupUnstable(groupId, true); + colocateIndex.markGroupUnstable(groupId, "mark unstable via http api", true); } else if (method.equals(HttpMethod.DELETE)) { colocateIndex.markGroupStable(groupId, true); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/meta/ColocateMetaService.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/meta/ColocateMetaService.java index b9c75ffcbe..8fc22ae7b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/meta/ColocateMetaService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/meta/ColocateMetaService.java @@ -110,7 +110,7 @@ public class ColocateMetaService extends RestBaseController { String method = request.getMethod(); if ("POST".equalsIgnoreCase(method)) { - colocateIndex.markGroupUnstable(groupId, true); + colocateIndex.markGroupUnstable(groupId, "mark unstable via http api", true); } else if ("DELETE".equalsIgnoreCase(method)) { colocateIndex.markGroupStable(groupId, true); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java index 03fd69a6eb..0c740b4f37 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java @@ -53,6 +53,9 @@ import java.util.concurrent.atomic.AtomicBoolean; */ public class Backend implements Writable { + // Represent a meaningless IP + public static final String DUMMY_IP = "0.0.0.0"; + public enum BackendState { using, /* backend is belong to a cluster*/ offline,