From 78204f7c92c835bcfcddd3c9508f34a4d752a613 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Tue, 31 Oct 2023 11:32:47 +0800 Subject: [PATCH] [Fix](statistics)Fix external couldn't analyze database bug (#26025) --- .../scripts/create_preinstalled_table.hql | 9 ++ .../apache/doris/analysis/AnalyzeTblStmt.java | 6 +- .../org/apache/doris/qe/StmtExecutor.java | 2 +- .../doris/statistics/AnalysisManager.java | 7 +- .../hive/test_hive_statistics_p0.groovy | 83 +++++++++++++++++++ .../hive/test_hive_analyze_db.groovy | 4 +- 6 files changed, 103 insertions(+), 8 deletions(-) create mode 100644 regression-test/suites/external_table_p0/hive/test_hive_statistics_p0.groovy diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index 2fbdbbe6db..6b3e04e25b 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -1764,3 +1764,12 @@ msck repair table orc_decimal_table; show tables; + + +create database stats_test; +use stats_test; +create table stats_test1 (id INT, value STRING) STORED AS ORC; +create table stats_test2 (id INT, value STRING) STORED AS PARQUET; + +insert into stats_test1 values (1, 'name1'), (2, 'name2'), (3, 'name3'); +insert into stats_test2 values (1, 'name1'), (2, 'name2'), (3, 'name3'); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java index 2c9abe94c7..f69787f7e3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java @@ -105,13 +105,17 @@ public class AnalyzeTblStmt extends AnalyzeStmt { } public AnalyzeTblStmt(AnalyzeProperties analyzeProperties, TableName tableName, List columnNames, long dbId, - TableIf table) { + TableIf table) throws AnalysisException { super(analyzeProperties); this.tableName = tableName; this.columnNames = columnNames; this.dbId = dbId; this.table = table; this.isAllColumns = columnNames == null; + String catalogName = tableName.getCtl(); + CatalogIf catalog = Env.getCurrentEnv().getCatalogMgr() + .getCatalogOrAnalysisException(catalogName); + this.catalogId = catalog.getId(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index 92fd39f89f..56f3cd84a2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -2059,7 +2059,7 @@ public class StmtExecutor { context.getState().setOk(); } - private void handleAnalyzeStmt() throws DdlException { + private void handleAnalyzeStmt() throws DdlException, AnalysisException { context.env.getAnalysisManager().createAnalyze((AnalyzeStmt) parsedStmt, isProxy); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 972e90c583..ef6cafa1f3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -287,7 +287,7 @@ public class AnalysisManager extends Daemon implements Writable { return statisticsCache; } - public void createAnalyze(AnalyzeStmt analyzeStmt, boolean proxy) throws DdlException { + public void createAnalyze(AnalyzeStmt analyzeStmt, boolean proxy) throws DdlException, AnalysisException { if (!StatisticsUtil.statsTblAvailable() && !FeConstants.runningUnitTest) { throw new DdlException("Stats table not available, please make sure your cluster status is normal"); } @@ -298,7 +298,7 @@ public class AnalysisManager extends Daemon implements Writable { } } - public void createAnalysisJobs(AnalyzeDBStmt analyzeDBStmt, boolean proxy) throws DdlException { + public void createAnalysisJobs(AnalyzeDBStmt analyzeDBStmt, boolean proxy) throws DdlException, AnalysisException { DatabaseIf db = analyzeDBStmt.getDb(); // Using auto analyzer if user specifies. if (analyzeDBStmt.getAnalyzeProperties().getProperties().containsKey("use.auto.analyzer")) { @@ -311,7 +311,8 @@ public class AnalysisManager extends Daemon implements Writable { } } - public List buildAnalysisInfosForDB(DatabaseIf db, AnalyzeProperties analyzeProperties) { + public List buildAnalysisInfosForDB(DatabaseIf db, AnalyzeProperties analyzeProperties) + throws AnalysisException { db.readLock(); List tbls = db.getTables(); List analysisInfos = new ArrayList<>(); diff --git a/regression-test/suites/external_table_p0/hive/test_hive_statistics_p0.groovy b/regression-test/suites/external_table_p0/hive/test_hive_statistics_p0.groovy new file mode 100644 index 0000000000..501daaf857 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_statistics_p0.groovy @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_statistics_p0", "all_types,p0,external,hive,external_docker,external_docker_hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + try { + String hms_port = context.config.otherConfigs.get("hms_port") + String catalog_name = "test_hive_statistics_p0" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + sql """use `${catalog_name}`.`stats_test`""" + sql """analyze database stats_test with sync""" + def result = sql """show column stats stats_test1(id);""" + assertEquals(1, result.size()) + assertEquals("id", result[0][0]) + assertEquals("3.0", result[0][1]) + assertEquals("3.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("12.0", result[0][4]) + assertEquals("4.0", result[0][5]) + assertEquals("1", result[0][6]) + assertEquals("3", result[0][7]) + + result = sql """show column stats stats_test1(value);""" + assertEquals(1, result.size()) + assertEquals("value", result[0][0]) + assertEquals("3.0", result[0][1]) + assertEquals("3.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("15.0", result[0][4]) + assertEquals("5.0", result[0][5]) + assertEquals("\'name1\'" , result[0][6]) + assertEquals("\'name3\'" , result[0][7]) + + result = sql """show column stats stats_test2(id);""" + assertEquals(1, result.size()) + assertEquals("id", result[0][0]) + assertEquals("3.0", result[0][1]) + assertEquals("3.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("12.0", result[0][4]) + assertEquals("4.0", result[0][5]) + assertEquals("1", result[0][6]) + assertEquals("3", result[0][7]) + + result = sql """show column stats stats_test2(value);""" + assertEquals(1, result.size()) + assertEquals("value", result[0][0]) + assertEquals("3.0", result[0][1]) + assertEquals("3.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("15.0", result[0][4]) + assertEquals("5.0", result[0][5]) + assertEquals("\'name1\'", result[0][6]) + assertEquals("\'name3\'", result[0][7]) + + + sql """drop catalog if exists ${catalog_name}""" + } finally { + } + } +} + diff --git a/regression-test/suites/external_table_p2/hive/test_hive_analyze_db.groovy b/regression-test/suites/external_table_p2/hive/test_hive_analyze_db.groovy index bcbb6040bf..b0fffdef24 100644 --- a/regression-test/suites/external_table_p2/hive/test_hive_analyze_db.groovy +++ b/regression-test/suites/external_table_p2/hive/test_hive_analyze_db.groovy @@ -34,9 +34,7 @@ suite("test_hive_analyze_db", "p2,external,hive,external_remote,external_remote_ logger.info("switched to catalog " + catalog_name) sql """use statistics;""" sql """set query_timeout=300""" - // sql """analyze database statistics with sync""" - sql """analyze table statistics with sync""" - sql """analyze table stats with sync""" + sql """analyze database statistics with sync""" def result = sql """show column stats statistics""" assertEquals(result.size(), 17)