From b87ea687208a286119b3270e64ee7aa2710a67ac Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Thu, 21 Sep 2023 10:28:37 +0800 Subject: [PATCH] [Fix](statistics) Fix analyze olap table couldn't get partition names bug (#24696) Call getPartitionNames to get all partitions while analyzing for olap table. Couldn't return NULL, otherwise analyze for olap table will do nothing. --- .../apache/doris/analysis/AnalyzeTblStmt.java | 15 ++-- .../statistics/test_basic_statistics.groovy | 75 +++++++++++++++++++ 2 files changed, 83 insertions(+), 7 deletions(-) create mode 100644 regression-test/suites/statistics/test_basic_statistics.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java index 5ca1ecd76c..cbc66f367f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java @@ -24,6 +24,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.View; +import org.apache.doris.catalog.external.ExternalTable; import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; @@ -41,6 +42,7 @@ import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.Sets; import org.apache.commons.lang3.StringUtils; +import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.Set; @@ -239,16 +241,15 @@ public class AnalyzeTblStmt extends AnalyzeStmt { public Set getPartitionNames() { if (partitionNames == null || partitionNames.getPartitionNames() == null) { - return null; + if (table instanceof ExternalTable) { + // External table couldn't return all partitions when partitionNames is not set. + // Because Analyze Table command for external table could specify partition names. + return Collections.emptySet(); + } + return table.getPartitionNames(); } Set partitions = Sets.newHashSet(); partitions.addAll(partitionNames.getPartitionNames()); - /* - if (isSamplingPartition()) { - int partNum = ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum(); - partitions = partitions.stream().limit(partNum).collect(Collectors.toSet()); - } - */ return partitions; } diff --git a/regression-test/suites/statistics/test_basic_statistics.groovy b/regression-test/suites/statistics/test_basic_statistics.groovy new file mode 100644 index 0000000000..f819be491e --- /dev/null +++ b/regression-test/suites/statistics/test_basic_statistics.groovy @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_basic_statistics") { + String db = "test_basic_statistics" + String tbl = "test_table_1" + + sql """ + DROP DATABASE IF EXISTS `${db}` + """ + + sql """ + CREATE DATABASE `${db}` + """ + + sql """ use `${db}`""" + + sql """ + DROP TABLE IF EXISTS `${tbl}` + """ + + sql """ + CREATE TABLE IF NOT EXISTS `${tbl}` ( + `id` int(11) not null comment "", + `name` varchar(100) null comment "" + ) engine=olap + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 properties("replication_num" = "1") + """ + + sql """ + INSERT INTO `${tbl}` VALUES (1, 'name1'), (2, 'name2'), (3, 'name3'), (4, 'name4'), (5, 'name5'), (6, 'name6'), (7, 'name7'), (8, 'name8'), (9, 'name9') + """ + + sql """ analyze table ${tbl} with sync""" + def result = sql """show column stats ${tbl} (id)""" + assertTrue(result.size() == 1) + assertTrue(result[0][0] == "id") + assertTrue(result[0][1] == "9.0") + assertTrue(result[0][2] == "9.0") + assertTrue(result[0][3] == "0.0") + assertTrue(result[0][4] == "40.0") + assertTrue(result[0][5] == "4.0") + assertTrue(result[0][6] == "1") + assertTrue(result[0][7] == "9") + + result = sql """show column stats ${tbl} (name)""" + assertTrue(result.size() == 1) + assertTrue(result[0][0] == "name") + assertTrue(result[0][1] == "9.0") + assertTrue(result[0][2] == "9.0") + assertTrue(result[0][3] == "0.0") + assertTrue(result[0][4] == "50.0") + assertTrue(result[0][5] == "5.0") + assertTrue(result[0][6] == "\'name1\'") + assertTrue(result[0][7] == "\'name9\'") + + sql """drop table ${tbl}""" + sql """drop database ${db}""" + +}