[Enhancement](data skew) extends show data skew (#32732)

This commit is contained in:
Jensen
2024-03-28 17:13:36 +08:00
committed by yiguolei
parent 3a6c37c6d5
commit 407f8642da
4 changed files with 94 additions and 46 deletions

View File

@ -20,7 +20,6 @@ package org.apache.doris.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.UserException;
@ -34,7 +33,7 @@ import com.google.common.collect.ImmutableList;
// show data skew from tbl [partition(p1, p2, ...)]
public class ShowDataSkewStmt extends ShowStmt {
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
.add("BucketIdx").add("AvgRowCount").add("AvgDataSize")
.add("PartitionName").add("BucketIdx").add("AvgRowCount").add("AvgDataSize")
.add("Graph").add("Percent")
.build();
@ -58,10 +57,6 @@ public class ShowDataSkewStmt extends ShowStmt {
ConnectContext.get().getRemoteIP(),
tblRef.getName().getDb() + "." + tblRef.getName().getTbl());
}
PartitionNames partitionNames = tblRef.getPartitionNames();
if (partitionNames == null || partitionNames.getPartitionNames().size() != 1) {
throw new AnalysisException("Should specify one and only one partition");
}
}
public String getDbName() {

View File

@ -34,6 +34,7 @@ import com.google.common.collect.Maps;
import java.text.DecimalFormat;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -267,55 +268,70 @@ public class MetadataViewer {
List<List<String>> result = Lists.newArrayList();
Env env = Env.getCurrentEnv();
if (partitionNames == null || partitionNames.getPartitionNames().size() != 1) {
throw new DdlException("Should specify one and only one partitions");
}
Database db = env.getInternalCatalog().getDbOrDdlException(dbName);
OlapTable olapTable = db.getOlapTableOrDdlException(tblName);
if (olapTable.getPartitionNames().isEmpty()) {
throw new DdlException("Can not find any partition from " + dbName + "." + tblName);
}
// patition -> isTmep
Map<String, Boolean> allPartionNames = new HashMap<>();
if (partitionNames == null) {
for (Partition p : olapTable.getPartitions()) {
allPartionNames.put(p.getName(), false);
}
for (Partition p : olapTable.getTempPartitions()) {
allPartionNames.put(p.getName(), true);
}
} else {
for (String name : partitionNames.getPartitionNames()) {
allPartionNames.put(name, partitionNames.isTemp());
}
}
olapTable.readLock();
try {
Partition partition = null;
// check partition
for (String partName : partitionNames.getPartitionNames()) {
partition = olapTable.getPartition(partName, partitionNames.isTemp());
for (Map.Entry<String, Boolean> partName : allPartionNames.entrySet()) {
partition = olapTable.getPartition(partName.getKey(), partName.getValue());
if (partition == null) {
throw new DdlException("Partition does not exist: " + partName);
}
break;
}
DistributionInfo distributionInfo = partition.getDistributionInfo();
List<Long> rowCountTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
List<Long> dataSizeTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
for (long i = 0; i < distributionInfo.getBucketNum(); i++) {
rowCountTabletInfos.add(0L);
dataSizeTabletInfos.add(0L);
}
long totalSize = 0;
for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
List<Long> tabletIds = mIndex.getTabletIdsInOrder();
for (int i = 0; i < tabletIds.size(); i++) {
Tablet tablet = mIndex.getTablet(tabletIds.get(i));
long rowCount = tablet.getRowCount(true);
long dataSize = tablet.getDataSize(true);
rowCountTabletInfos.set(i, rowCountTabletInfos.get(i) + rowCount);
dataSizeTabletInfos.set(i, dataSizeTabletInfos.get(i) + dataSize);
totalSize += dataSize;
DistributionInfo distributionInfo = partition.getDistributionInfo();
List<Long> rowCountTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
List<Long> dataSizeTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
for (long i = 0; i < distributionInfo.getBucketNum(); i++) {
rowCountTabletInfos.add(0L);
dataSizeTabletInfos.add(0L);
}
}
// graph
for (int i = 0; i < distributionInfo.getBucketNum(); i++) {
List<String> row = Lists.newArrayList();
row.add(String.valueOf(i));
row.add(rowCountTabletInfos.get(i).toString());
row.add(dataSizeTabletInfos.get(i).toString());
row.add(graph(dataSizeTabletInfos.get(i), totalSize));
row.add(totalSize == dataSizeTabletInfos.get(i) ? (totalSize == 0L ? "0.00%" : "100.00%") :
df.format((double) dataSizeTabletInfos.get(i) / totalSize));
result.add(row);
long totalSize = 0;
for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
List<Long> tabletIds = mIndex.getTabletIdsInOrder();
for (int i = 0; i < tabletIds.size(); i++) {
Tablet tablet = mIndex.getTablet(tabletIds.get(i));
long rowCount = tablet.getRowCount(true);
long dataSize = tablet.getDataSize(true);
rowCountTabletInfos.set(i, rowCountTabletInfos.get(i) + rowCount);
dataSizeTabletInfos.set(i, dataSizeTabletInfos.get(i) + dataSize);
totalSize += dataSize;
}
}
// graph
for (int i = 0; i < distributionInfo.getBucketNum(); i++) {
List<String> row = Lists.newArrayList();
row.add(partName.getKey());
row.add(String.valueOf(i));
row.add(rowCountTabletInfos.get(i).toString());
row.add(dataSizeTabletInfos.get(i).toString());
row.add(graph(dataSizeTabletInfos.get(i), totalSize));
row.add(totalSize == dataSizeTabletInfos.get(i) ? (totalSize == 0L ? "0.00%" : "100.00%") :
df.format((double) dataSizeTabletInfos.get(i) / totalSize));
result.add(row);
}
}
} finally {
olapTable.readUnlock();

View File

@ -64,7 +64,7 @@ public class ShowReplicaTest extends TestWithFeService {
executor = new ShowExecutor(connectContext, skewStmt);
resultSet = executor.execute();
Assert.assertEquals(10, resultSet.getResultRows().size());
Assert.assertEquals(5, resultSet.getResultRows().get(0).size());
Assert.assertEquals(6, resultSet.getResultRows().get(0).size());
// update tablets' data size and row count
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException("test");
@ -87,8 +87,8 @@ public class ShowReplicaTest extends TestWithFeService {
executor = new ShowExecutor(connectContext, skewStmt);
resultSet = executor.execute();
Assert.assertEquals(10, resultSet.getResultRows().size());
Assert.assertEquals("4", resultSet.getResultRows().get(4).get(0));
Assert.assertEquals(5, resultSet.getResultRows().get(0).size());
Assert.assertEquals("4", resultSet.getResultRows().get(4).get(1));
Assert.assertEquals(6, resultSet.getResultRows().get(0).size());
}
@Test

View File

@ -0,0 +1,37 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_show_data_skew") {
sql """
CREATE TABLE test_show_data_skew (
id int,
name string,
pdate DATETIME)
PARTITION BY RANGE(pdate) (
FROM ("2023-04-16") TO ("2023-04-20") INTERVAL 1 DAY
) DISTRIBUTED BY HASH(id) BUCKETS 5
properties("replication_num" = "1");
"""
def result = sql """show data skew from test_show_data_skew;"""
assertTrue(result.size() == 20)
def result2 = sql """show data skew from test_show_data_skew partition(p_20230416);"""
assertTrue(result2.size() == 5)
def result3 = sql """show data skew from test_show_data_skew partition(p_20230416, p_20230418);"""
assertTrue(result3.size() == 10)
}