From 032c00e3c94e02175a504efd25dded3c4ce5d6d8 Mon Sep 17 00:00:00 2001 From: yujun Date: Thu, 10 Oct 2024 11:15:02 +0800 Subject: [PATCH] [branch-2.1](create table) show failed detail msg #41463 (#41544) cherry-pick: #41463 --- .../common/util/DynamicPartitionUtil.java | 3 +- .../java/org/apache/doris/system/Backend.java | 65 +++++++++++++++++++ .../doris/system/SystemInfoService.java | 15 ++++- .../apache/doris/catalog/CreateTableTest.java | 8 ++- .../doris/catalog/ModifyBackendTest.java | 15 +++-- .../trees/plans/CreateTableCommandTest.java | 5 +- 6 files changed, 102 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java index bcf2c0303e..59a38348f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java @@ -257,7 +257,8 @@ public class DynamicPartitionUtil { } catch (DdlException e) { throw new DdlException("Failed to find enough backend for ssd storage medium. When setting " + DynamicPartitionProperty.HOT_PARTITION_NUM + " > 0, the hot partitions will store " - + "in ssd. Please check the replication num,replication tag and storage medium."); + + "in ssd. Please check the replication num,replication tag and storage medium." + + Env.getCurrentSystemInfo().getDetailsForCreateReplica(replicaAlloc)); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java index 91f1624d56..80a3d91795 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java @@ -248,6 +248,71 @@ public class Backend implements Writable { this.backendStatus.isLoadDisabled = isLoadDisabled; } + public String getDetailsForCreateReplica() { + int hddBad = 0; + int hddExceedLimit = 0; + int hddOk = 0; + int ssdBad = 0; + int ssdExceedLimit = 0; + int ssdOk = 0; + for (DiskInfo disk : disksRef.values()) { + TStorageMedium storageMedium = disk.getStorageMedium(); + if (storageMedium == TStorageMedium.HDD) { + if (!disk.isAlive()) { + hddBad++; + } else if (disk.exceedLimit(true)) { + hddExceedLimit++; + } else { + hddOk++; + } + } else if (storageMedium == TStorageMedium.SSD) { + if (!disk.isAlive()) { + ssdBad++; + } else if (disk.exceedLimit(true)) { + ssdExceedLimit++; + } else { + ssdOk++; + } + } + } + + StringBuilder sb = new StringBuilder("["); + sb.append("backendId=").append(id); + sb.append(", host=").append(host); + if (!isAlive()) { + sb.append(", isAlive=false, exclude it"); + } else if (isDecommissioned()) { + sb.append(", isDecommissioned=true, exclude it"); + } else if (isComputeNode()) { + sb.append(", isComputeNode=true, exclude it"); + } else { + sb.append(", hdd disks count={"); + if (hddOk > 0) { + sb.append("ok=").append(hddOk).append(","); + } + if (hddBad > 0) { + sb.append("bad=").append(hddBad).append(","); + } + if (hddExceedLimit > 0) { + sb.append("capExceedLimit=").append(hddExceedLimit).append(","); + } + sb.append("}, ssd disk count={"); + if (ssdOk > 0) { + sb.append("ok=").append(ssdOk).append(","); + } + if (ssdBad > 0) { + sb.append("bad=").append(ssdBad).append(","); + } + if (ssdExceedLimit > 0) { + sb.append("capExceedLimit=").append(ssdExceedLimit).append(","); + } + sb.append("}"); + } + sb.append("]"); + + return sb.toString(); + } + // for test only public void updateOnce(int bePort, int httpPort, int beRpcPort) { if (this.bePort != bePort) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java index 56ef540a24..0ca452992f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java @@ -549,7 +549,8 @@ public class SystemInfoService { if (!failedEntries.isEmpty()) { String failedMsg = Joiner.on("\n").join(failedEntries); throw new DdlException("Failed to find enough backend, please check the replication num," - + "replication tag and storage medium and avail capacity of backends.\n" + + "replication tag and storage medium and avail capacity of backends " + + "or maybe all be on same host." + getDetailsForCreateReplica(replicaAlloc) + "\n" + "Create failed replications:\n" + failedMsg); } } @@ -558,6 +559,18 @@ public class SystemInfoService { return Pair.of(chosenBackendIds, storageMedium); } + public String getDetailsForCreateReplica(ReplicaAllocation replicaAlloc) { + StringBuilder sb = new StringBuilder(" Backends details: "); + for (Tag tag : replicaAlloc.getAllocMap().keySet()) { + sb.append("backends with tag ").append(tag).append(" is "); + sb.append(idToBackendRef.values().stream().filter(be -> be.getLocationTag() == tag) + .map(Backend::getDetailsForCreateReplica) + .collect(Collectors.toList())); + sb.append(", "); + } + return sb.toString(); + } + /** * Select a set of backends by the given policy. * diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java index a2867c5d96..4a63953635 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java @@ -279,7 +279,9 @@ public class CreateTableTest extends TestWithFeService { ConfigBase.setMutableConfig("disable_storage_medium_check", "false"); ExceptionChecker .expectThrowsWithMsg(DdlException.class, - "Failed to find enough backend, please check the replication num,replication tag and storage medium and avail capacity of backends.\n" + "Failed to find enough backend, please check the replication num,replication tag and storage medium and avail capacity of backends " + + "or maybe all be on same host." + + Env.getCurrentSystemInfo().getDetailsForCreateReplica(new ReplicaAllocation((short) 1)) + "\n" + "Create failed replications:\n" + "replication tag: {\"location\" : \"default\"}, replication num: 1, storage medium: SSD", () -> createTable( @@ -288,7 +290,9 @@ public class CreateTableTest extends TestWithFeService { ExceptionChecker .expectThrowsWithMsg(DdlException.class, - "Failed to find enough backend, please check the replication num,replication tag and storage medium and avail capacity of backends.\n" + "Failed to find enough backend, please check the replication num,replication tag and storage medium and avail capacity of backends " + + "or maybe all be on same host." + + Env.getCurrentSystemInfo().getDetailsForCreateReplica(new ReplicaAllocation((short) 1)) + "\n" + "Create failed replications:\n" + "replication tag: {\"location\" : \"default\"}, replication num: 1, storage medium: SSD", () -> createTable("create table test.tb7_1(key1 int, key2 varchar(10))\n" diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/ModifyBackendTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/ModifyBackendTest.java index ca4a658c4d..2da4c59082 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/ModifyBackendTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/ModifyBackendTest.java @@ -31,6 +31,7 @@ import org.apache.doris.system.Backend; import org.apache.doris.system.SystemInfoService; import org.apache.doris.utframe.UtFrameUtils; +import com.google.common.collect.Maps; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -82,7 +83,9 @@ public class ModifyBackendTest { + "buckets 3 properties(\n" + "\"replication_num\" = \"1\"\n" + ");"; CreateTableStmt createStmt = (CreateTableStmt) UtFrameUtils.parseAndAnalyzeStmt(createStr, connectContext); ExceptionChecker.expectThrowsWithMsg(DdlException.class, - "Failed to find enough backend, please check the replication num,replication tag and storage medium and avail capacity of backends.\n" + "Failed to find enough backend, please check the replication num,replication tag and storage medium and avail capacity of backends " + + "or maybe all be on same host." + + Env.getCurrentSystemInfo().getDetailsForCreateReplica(new ReplicaAllocation((short) 1)) + "\n" + "Create failed replications:\n" + "replication tag: {\"location\" : \"default\"}, replication num: 1, storage medium: HDD", () -> DdlExecutor.execute(Env.getCurrentEnv(), createStmt)); @@ -151,9 +154,13 @@ public class ModifyBackendTest { String partName = tbl.getPartitionNames().stream().findFirst().get(); String wrongAlterStr = "alter table test.tbl4 modify partition " + partName + " set ('replication_allocation' = 'tag.location.zonex:1')"; - ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, "errCode = 2, detailMessage = " - + "errCode = 2, detailMessage = Failed to find enough backend, " - + "please check the replication num,replication tag and storage medium and avail capacity of backends.\n" + Map allocMap = Maps.newHashMap(); + allocMap.put(Tag.create(Tag.TYPE_LOCATION, "zonex"), (short) 1); + ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, "errCode = 2," + + " detailMessage = Failed to find enough backend, " + + "please check the replication num,replication tag and storage medium and avail capacity of backends " + + "or maybe all be on same host." + + Env.getCurrentSystemInfo().getDetailsForCreateReplica(new ReplicaAllocation(allocMap)) + "\n" + "Create failed replications:\n" + "replication tag: {\"location\" : \"zonex\"}, replication num: 1, storage medium: null", () -> UtFrameUtils.parseAndAnalyzeStmt(wrongAlterStr, connectContext)); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java index 741faea4a1..c64354fa96 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java @@ -27,6 +27,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TabletMeta; import org.apache.doris.catalog.Type; @@ -285,7 +286,9 @@ public class CreateTableCommandTest extends TestWithFeService { ConfigBase.setMutableConfig("disable_storage_medium_check", "false"); checkThrow(org.apache.doris.common.DdlException.class, - "Failed to find enough backend, please check the replication num,replication tag and storage medium.\n" + "Failed to find enough backend, please check the replication num,replication tag and storage medium and avail capacity of backends " + + "or maybe all be on same host." + + Env.getCurrentSystemInfo().getDetailsForCreateReplica(new ReplicaAllocation((short) 1)) + "\n" + "Create failed replications:\n" + "replication tag: {\"location\" : \"default\"}, replication num: 1, storage medium: SSD", () -> createTable("create table test.tb7(key1 int, key2 varchar(10)) distributed by hash(key1) \n"