From d278f400d459caff75936588c82b07d52586d24f Mon Sep 17 00:00:00 2001 From: caiconghui <55968745+caiconghui@users.noreply.github.com> Date: Wed, 13 Jul 2022 08:27:20 +0800 Subject: [PATCH] [enhancement](show data skew) Support show avg_row_count for data skew of one table (#10790) --- docs/.vuepress/sidebar/en/docs.js | 1 + docs/.vuepress/sidebar/zh-CN/docs.js | 1 + .../Show-Statements/SHOW-DATA-SKEW.md | 56 +++++++++++++++++++ .../Show-Statements/SHOW-DATA-SKEW.md | 56 +++++++++++++++++++ .../doris/analysis/ShowDataSkewStmt.java | 2 +- .../apache/doris/catalog/MetadataViewer.java | 21 ++++--- .../java/org/apache/doris/catalog/Tablet.java | 6 ++ .../common/proc/TabletHealthProcDir.java | 10 ++-- .../doris/analysis/AdminShowReplicaTest.java | 4 +- 9 files changed, 141 insertions(+), 16 deletions(-) create mode 100644 docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md create mode 100644 docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md diff --git a/docs/.vuepress/sidebar/en/docs.js b/docs/.vuepress/sidebar/en/docs.js index 8901be9be2..111470a313 100644 --- a/docs/.vuepress/sidebar/en/docs.js +++ b/docs/.vuepress/sidebar/en/docs.js @@ -750,6 +750,7 @@ module.exports = [ "SHOW-CREATE-TABLE", "SHOW-CREATE-MATERIALIZED-VIEW", "SHOW-DATA", + "SHOW-DATA-SKEW", "SHOW-DATABASE-ID", "SHOW-DATABASES", "SHOW-DELETE", diff --git a/docs/.vuepress/sidebar/zh-CN/docs.js b/docs/.vuepress/sidebar/zh-CN/docs.js index 428e9e65bd..a4ef26fc7d 100644 --- a/docs/.vuepress/sidebar/zh-CN/docs.js +++ b/docs/.vuepress/sidebar/zh-CN/docs.js @@ -750,6 +750,7 @@ module.exports = [ "SHOW-CREATE-TABLE", "SHOW-CREATE-MATERIALIZED-VIEW", "SHOW-DATA", + "SHOW-DATA-SKEW", "SHOW-DATABASE-ID", "SHOW-DATABASES", "SHOW-DELETE", diff --git a/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md new file mode 100644 index 0000000000..c54587eb72 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md @@ -0,0 +1,56 @@ +--- +{ +"title": "SHOW DATA SKEW", +"language": "en" +} +--- + + + +## SHOW-DATA-SKEW + +### Name + +SHOW DATA SKEW + +### Description + + This statement is used to view the data skew of a table or a partition. + + grammar: + + SHOW DATA SKEW FROM [db_name.]tbl_name [PARTITION (p1)]; + + Description: + + 1. Only one partition must be specified. For non-partitioned tables, the partition name is the same as the table name. + 2. The result will show row count and data volume of each bucket under the specified partition, and the proportion of the data volume of each bucket in the total data volume. + +### Example + + 1. View the data skew of the table + + SHOW DATA SKEW FROM db1.test PARTITION(p1); + +### Keywords + + SHOW, DATA, SKEW + +### Best Practice diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md new file mode 100644 index 0000000000..fb1de1c6be --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md @@ -0,0 +1,56 @@ +--- +{ +"title": "SHOW-DATA-SKEW", +"language": "zh-CN" +} +--- + + + +## SHOW-DATA-SKEW + +### Name + +SHOW DATA SKEW + +### Description + + 该语句用于查看表或某个分区的数据倾斜情况。 + + 语法: + + SHOW DATA SKEW FROM [db_name.]tbl_name PARTITION (partition_name); + + 说明: + + 1. 必须指定且仅指定一个分区。对于非分区表,分区名称同表名。 + 2. 结果将展示指定分区下,各个分桶的数据行数,数据量,以及每个分桶数据量在总数据量中的占比。 + +### Example + + 1. 查看表的数据倾斜情况 + + SHOW DATA SKEW FROM db1.test PARTITION(p1); + +### Keywords + + SHOW,DATA,SKEW + +### Best Practice diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataSkewStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataSkewStmt.java index 21d7a5c2a3..49cb093f05 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataSkewStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataSkewStmt.java @@ -34,7 +34,7 @@ import com.google.common.collect.ImmutableList; // show data skew from tbl [partition(p1, p2, ...)] public class ShowDataSkewStmt extends ShowStmt { public static final ImmutableList TITLE_NAMES = new ImmutableList.Builder() - .add("BucketIdx").add("AvgDataSize") + .add("BucketIdx").add("AvgRowCount").add("AvgDataSize") .add("Graph").add("Percent") .build(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java index 56d6411f4a..c1aa7000c7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java @@ -282,9 +282,11 @@ public class MetadataViewer { break; } DistributionInfo distributionInfo = partition.getDistributionInfo(); - List tabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum()); + List rowCountTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum()); + List dataSizeTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum()); for (long i = 0; i < distributionInfo.getBucketNum(); i++) { - tabletInfos.add(0L); + rowCountTabletInfos.add(0L); + dataSizeTabletInfos.add(0L); } long totalSize = 0; @@ -292,20 +294,23 @@ public class MetadataViewer { List tabletIds = mIndex.getTabletIdsInOrder(); for (int i = 0; i < tabletIds.size(); i++) { Tablet tablet = mIndex.getTablet(tabletIds.get(i)); + long rowCount = tablet.getRowCount(true); long dataSize = tablet.getDataSize(true); - tabletInfos.set(i, tabletInfos.get(i) + dataSize); + rowCountTabletInfos.set(i, rowCountTabletInfos.get(i) + rowCount); + dataSizeTabletInfos.set(i, dataSizeTabletInfos.get(i) + dataSize); totalSize += dataSize; } } // graph - for (int i = 0; i < tabletInfos.size(); i++) { + for (int i = 0; i < distributionInfo.getBucketNum(); i++) { List row = Lists.newArrayList(); row.add(String.valueOf(i)); - row.add(tabletInfos.get(i).toString()); - row.add(graph(tabletInfos.get(i), totalSize)); - row.add(totalSize == tabletInfos.get(i) - ? "100.00%" : df.format((double) tabletInfos.get(i) / totalSize)); + row.add(rowCountTabletInfos.get(i).toString()); + row.add(dataSizeTabletInfos.get(i).toString()); + row.add(graph(dataSizeTabletInfos.get(i), totalSize)); + row.add(totalSize == dataSizeTabletInfos.get(i) + ? "100.00%" : df.format((double) dataSizeTabletInfos.get(i) / totalSize)); result.add(row); } } finally { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java index 5642455c4d..007c5410f9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java @@ -394,6 +394,12 @@ public class Tablet extends MetaObject implements Writable { return singleReplica ? Double.valueOf(s.average().orElse(0)).longValue() : s.sum(); } + public long getRowCount(boolean singleReplica) { + LongStream s = replicas.stream().filter(r -> r.getState() == ReplicaState.NORMAL) + .mapToLong(Replica::getRowCount); + return singleReplica ? Double.valueOf(s.average().orElse(0)).longValue() : s.sum(); + } + /** * A replica is healthy only if * 1. the backend is available diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java index c88377924b..fd64d76be9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java @@ -290,11 +290,11 @@ public class TabletHealthProcDir implements ProcDirInterface { this.colocateMismatchNum += other.colocateMismatchNum; this.colocateRedundantNum += other.colocateRedundantNum; this.needFurtherRepairNum += other.needFurtherRepairNum; - this.unrecoverableNum += unrecoverableNum; - this.replicaCompactionTooSlowNum += replicaCompactionTooSlowNum; - this.inconsistentNum += inconsistentNum; - this.oversizeNum += oversizeNum; - this.cloningNum += cloningNum; + this.unrecoverableNum += other.unrecoverableNum; + this.replicaCompactionTooSlowNum += other.replicaCompactionTooSlowNum; + this.inconsistentNum += other.inconsistentNum; + this.oversizeNum += other.oversizeNum; + this.cloningNum += other.cloningNum; return this; } else if (other.summary) { return other.reduce(this); diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminShowReplicaTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminShowReplicaTest.java index 7eab559cd4..81dfc8e462 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminShowReplicaTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminShowReplicaTest.java @@ -64,7 +64,7 @@ public class AdminShowReplicaTest extends TestWithFeService { executor = new ShowExecutor(connectContext, skewStmt); resultSet = executor.execute(); Assert.assertEquals(10, resultSet.getResultRows().size()); - Assert.assertEquals(4, resultSet.getResultRows().get(0).size()); + Assert.assertEquals(5, resultSet.getResultRows().get(0).size()); // update tablets' data size and row count Database db = Catalog.getCurrentInternalCatalog().getDbOrAnalysisException("default_cluster:test"); @@ -88,7 +88,7 @@ public class AdminShowReplicaTest extends TestWithFeService { resultSet = executor.execute(); Assert.assertEquals(10, resultSet.getResultRows().size()); Assert.assertEquals("4", resultSet.getResultRows().get(4).get(0)); - Assert.assertEquals(4, resultSet.getResultRows().get(0).size()); + Assert.assertEquals(5, resultSet.getResultRows().get(0).size()); } @Test