[Enhancement](data skew) extends show data skew (#32732)
This commit is contained in:
@ -20,7 +20,6 @@ package org.apache.doris.analysis;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.ScalarType;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.ErrorCode;
|
||||
import org.apache.doris.common.ErrorReport;
|
||||
import org.apache.doris.common.UserException;
|
||||
@ -34,7 +33,7 @@ import com.google.common.collect.ImmutableList;
|
||||
// show data skew from tbl [partition(p1, p2, ...)]
|
||||
public class ShowDataSkewStmt extends ShowStmt {
|
||||
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
|
||||
.add("BucketIdx").add("AvgRowCount").add("AvgDataSize")
|
||||
.add("PartitionName").add("BucketIdx").add("AvgRowCount").add("AvgDataSize")
|
||||
.add("Graph").add("Percent")
|
||||
.build();
|
||||
|
||||
@ -58,10 +57,6 @@ public class ShowDataSkewStmt extends ShowStmt {
|
||||
ConnectContext.get().getRemoteIP(),
|
||||
tblRef.getName().getDb() + "." + tblRef.getName().getTbl());
|
||||
}
|
||||
PartitionNames partitionNames = tblRef.getPartitionNames();
|
||||
if (partitionNames == null || partitionNames.getPartitionNames().size() != 1) {
|
||||
throw new AnalysisException("Should specify one and only one partition");
|
||||
}
|
||||
}
|
||||
|
||||
public String getDbName() {
|
||||
|
||||
@ -34,6 +34,7 @@ import com.google.common.collect.Maps;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@ -267,55 +268,70 @@ public class MetadataViewer {
|
||||
List<List<String>> result = Lists.newArrayList();
|
||||
Env env = Env.getCurrentEnv();
|
||||
|
||||
if (partitionNames == null || partitionNames.getPartitionNames().size() != 1) {
|
||||
throw new DdlException("Should specify one and only one partitions");
|
||||
}
|
||||
|
||||
Database db = env.getInternalCatalog().getDbOrDdlException(dbName);
|
||||
OlapTable olapTable = db.getOlapTableOrDdlException(tblName);
|
||||
|
||||
if (olapTable.getPartitionNames().isEmpty()) {
|
||||
throw new DdlException("Can not find any partition from " + dbName + "." + tblName);
|
||||
}
|
||||
|
||||
// patition -> isTmep
|
||||
Map<String, Boolean> allPartionNames = new HashMap<>();
|
||||
if (partitionNames == null) {
|
||||
for (Partition p : olapTable.getPartitions()) {
|
||||
allPartionNames.put(p.getName(), false);
|
||||
}
|
||||
for (Partition p : olapTable.getTempPartitions()) {
|
||||
allPartionNames.put(p.getName(), true);
|
||||
}
|
||||
} else {
|
||||
for (String name : partitionNames.getPartitionNames()) {
|
||||
allPartionNames.put(name, partitionNames.isTemp());
|
||||
}
|
||||
}
|
||||
|
||||
olapTable.readLock();
|
||||
try {
|
||||
Partition partition = null;
|
||||
// check partition
|
||||
for (String partName : partitionNames.getPartitionNames()) {
|
||||
partition = olapTable.getPartition(partName, partitionNames.isTemp());
|
||||
for (Map.Entry<String, Boolean> partName : allPartionNames.entrySet()) {
|
||||
partition = olapTable.getPartition(partName.getKey(), partName.getValue());
|
||||
if (partition == null) {
|
||||
throw new DdlException("Partition does not exist: " + partName);
|
||||
}
|
||||
break;
|
||||
}
|
||||
DistributionInfo distributionInfo = partition.getDistributionInfo();
|
||||
List<Long> rowCountTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
|
||||
List<Long> dataSizeTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
|
||||
for (long i = 0; i < distributionInfo.getBucketNum(); i++) {
|
||||
rowCountTabletInfos.add(0L);
|
||||
dataSizeTabletInfos.add(0L);
|
||||
}
|
||||
|
||||
long totalSize = 0;
|
||||
for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
|
||||
List<Long> tabletIds = mIndex.getTabletIdsInOrder();
|
||||
for (int i = 0; i < tabletIds.size(); i++) {
|
||||
Tablet tablet = mIndex.getTablet(tabletIds.get(i));
|
||||
long rowCount = tablet.getRowCount(true);
|
||||
long dataSize = tablet.getDataSize(true);
|
||||
rowCountTabletInfos.set(i, rowCountTabletInfos.get(i) + rowCount);
|
||||
dataSizeTabletInfos.set(i, dataSizeTabletInfos.get(i) + dataSize);
|
||||
totalSize += dataSize;
|
||||
DistributionInfo distributionInfo = partition.getDistributionInfo();
|
||||
List<Long> rowCountTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
|
||||
List<Long> dataSizeTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
|
||||
for (long i = 0; i < distributionInfo.getBucketNum(); i++) {
|
||||
rowCountTabletInfos.add(0L);
|
||||
dataSizeTabletInfos.add(0L);
|
||||
}
|
||||
}
|
||||
|
||||
// graph
|
||||
for (int i = 0; i < distributionInfo.getBucketNum(); i++) {
|
||||
List<String> row = Lists.newArrayList();
|
||||
row.add(String.valueOf(i));
|
||||
row.add(rowCountTabletInfos.get(i).toString());
|
||||
row.add(dataSizeTabletInfos.get(i).toString());
|
||||
row.add(graph(dataSizeTabletInfos.get(i), totalSize));
|
||||
row.add(totalSize == dataSizeTabletInfos.get(i) ? (totalSize == 0L ? "0.00%" : "100.00%") :
|
||||
df.format((double) dataSizeTabletInfos.get(i) / totalSize));
|
||||
result.add(row);
|
||||
long totalSize = 0;
|
||||
for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
|
||||
List<Long> tabletIds = mIndex.getTabletIdsInOrder();
|
||||
for (int i = 0; i < tabletIds.size(); i++) {
|
||||
Tablet tablet = mIndex.getTablet(tabletIds.get(i));
|
||||
long rowCount = tablet.getRowCount(true);
|
||||
long dataSize = tablet.getDataSize(true);
|
||||
rowCountTabletInfos.set(i, rowCountTabletInfos.get(i) + rowCount);
|
||||
dataSizeTabletInfos.set(i, dataSizeTabletInfos.get(i) + dataSize);
|
||||
totalSize += dataSize;
|
||||
}
|
||||
}
|
||||
|
||||
// graph
|
||||
for (int i = 0; i < distributionInfo.getBucketNum(); i++) {
|
||||
List<String> row = Lists.newArrayList();
|
||||
row.add(partName.getKey());
|
||||
row.add(String.valueOf(i));
|
||||
row.add(rowCountTabletInfos.get(i).toString());
|
||||
row.add(dataSizeTabletInfos.get(i).toString());
|
||||
row.add(graph(dataSizeTabletInfos.get(i), totalSize));
|
||||
row.add(totalSize == dataSizeTabletInfos.get(i) ? (totalSize == 0L ? "0.00%" : "100.00%") :
|
||||
df.format((double) dataSizeTabletInfos.get(i) / totalSize));
|
||||
result.add(row);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
olapTable.readUnlock();
|
||||
|
||||
@ -64,7 +64,7 @@ public class ShowReplicaTest extends TestWithFeService {
|
||||
executor = new ShowExecutor(connectContext, skewStmt);
|
||||
resultSet = executor.execute();
|
||||
Assert.assertEquals(10, resultSet.getResultRows().size());
|
||||
Assert.assertEquals(5, resultSet.getResultRows().get(0).size());
|
||||
Assert.assertEquals(6, resultSet.getResultRows().get(0).size());
|
||||
|
||||
// update tablets' data size and row count
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException("test");
|
||||
@ -87,8 +87,8 @@ public class ShowReplicaTest extends TestWithFeService {
|
||||
executor = new ShowExecutor(connectContext, skewStmt);
|
||||
resultSet = executor.execute();
|
||||
Assert.assertEquals(10, resultSet.getResultRows().size());
|
||||
Assert.assertEquals("4", resultSet.getResultRows().get(4).get(0));
|
||||
Assert.assertEquals(5, resultSet.getResultRows().get(0).size());
|
||||
Assert.assertEquals("4", resultSet.getResultRows().get(4).get(1));
|
||||
Assert.assertEquals(6, resultSet.getResultRows().get(0).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
37
regression-test/suites/show_p0/test_show_data_skew.groovy
Normal file
37
regression-test/suites/show_p0/test_show_data_skew.groovy
Normal file
@ -0,0 +1,37 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_show_data_skew") {
|
||||
sql """
|
||||
CREATE TABLE test_show_data_skew (
|
||||
id int,
|
||||
name string,
|
||||
pdate DATETIME)
|
||||
PARTITION BY RANGE(pdate) (
|
||||
FROM ("2023-04-16") TO ("2023-04-20") INTERVAL 1 DAY
|
||||
) DISTRIBUTED BY HASH(id) BUCKETS 5
|
||||
properties("replication_num" = "1");
|
||||
"""
|
||||
def result = sql """show data skew from test_show_data_skew;"""
|
||||
assertTrue(result.size() == 20)
|
||||
|
||||
def result2 = sql """show data skew from test_show_data_skew partition(p_20230416);"""
|
||||
assertTrue(result2.size() == 5)
|
||||
|
||||
def result3 = sql """show data skew from test_show_data_skew partition(p_20230416, p_20230418);"""
|
||||
assertTrue(result3.size() == 10)
|
||||
}
|
||||
Reference in New Issue
Block a user