From dfb02a710402d4f6ef24cfd44d9e8172302f9bc4 Mon Sep 17 00:00:00 2001 From: Kikyou1997 <33112463+Kikyou1997@users.noreply.github.com> Date: Wed, 7 Dec 2022 20:41:00 +0800 Subject: [PATCH] [refactor](statistics) Remove deprecated statistics related codes (#14797) --- fe/fe-core/src/main/cup/sql_parser.cup | 9 - .../doris/analysis/AlterColumnStatsStmt.java | 14 +- .../doris/analysis/AlterTableStatsStmt.java | 165 ------ .../doris/analysis/ShowAnalyzeStmt.java | 3 +- .../doris/analysis/ShowTableStatsStmt.java | 110 ---- .../java/org/apache/doris/catalog/Env.java | 33 -- .../apache/doris/planner/OlapScanNode.java | 3 - .../java/org/apache/doris/qe/DdlExecutor.java | 5 +- .../org/apache/doris/qe/ShowExecutor.java | 15 +- .../statistics/AnalysisTaskExecutor.java | 4 +- .../apache/doris/statistics/ColumnStat.java | 332 ----------- .../doris/statistics/ColumnStatistic.java | 13 +- .../doris/statistics/MetaStatisticsTask.java | 147 ----- .../doris/statistics/PartitionStats.java | 163 ------ .../doris/statistics/SQLStatisticsTask.java | 142 ----- .../statistics/SampleSQLStatisticsTask.java | 49 -- .../apache/doris/statistics/Statistics.java | 228 -------- .../doris/statistics/StatisticsDesc.java | 61 -- .../doris/statistics/StatisticsJob.java | 342 ----------- .../statistics/StatisticsJobManager.java | 192 ------- .../statistics/StatisticsJobScheduler.java | 530 ------------------ .../doris/statistics/StatisticsManager.java | 508 ----------------- .../doris/statistics/StatisticsTask.java | 172 ------ .../statistics/StatisticsTaskResult.java | 132 ----- .../statistics/StatisticsTaskScheduler.java | 198 ------- .../apache/doris/statistics/TableStats.java | 309 ---------- .../jobs/cascades/DeriveStatsJobTest.java | 15 - .../nereids/stats/StatsCalculatorTest.java | 8 - .../doris/statistics/ColumnStatsTest.java | 154 ----- .../doris/statistics/PartitionStatsTest.java | 136 ----- .../statistics/SQLStatisticsTaskTest.java | 207 ------- .../SampleSQLStatisticsTaskTest.java | 205 ------- .../StatisticsJobSchedulerTest.java | 182 ------ .../doris/statistics/StatisticsJobTest.java | 120 ---- .../statistics/StatisticsManagerTest.java | 167 ------ .../doris/statistics/StatisticsTest.java | 267 --------- .../doris/statistics/TableStatsTest.java | 182 ------ 37 files changed, 24 insertions(+), 5498 deletions(-) delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/AlterTableStatsStmt.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStat.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/SQLStatisticsTask.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/SampleSQLStatisticsTask.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsDesc.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJob.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTask.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTaskResult.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTaskScheduler.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/SQLStatisticsTaskTest.java delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/SampleSQLStatisticsTaskTest.java delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobTest.java delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 0735311f09..ee07447d4c 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -1268,10 +1268,6 @@ alter_stmt ::= {: RESULT = new AlterSqlBlockRuleStmt(ruleName, properties); :} - | KW_ALTER KW_TABLE table_name:tbl KW_SET KW_STATS LPAREN key_value_map:map RPAREN opt_partition_names:partitionNames - {: - RESULT = new AlterTableStatsStmt(tbl, map, partitionNames); - :} | KW_ALTER KW_TABLE table_name:tbl KW_MODIFY KW_COLUMN ident:columnName KW_SET KW_STATS LPAREN key_value_map:map RPAREN opt_partition_names:partitionNames {: @@ -3672,11 +3668,6 @@ show_param ::= {: RESULT = new ShowSyncJobStmt(dbName); :} - /* show table stats */ - | KW_TABLE KW_STATS opt_table_name:tbl opt_partition_names:partitionNames - {: - RESULT = new ShowTableStatsStmt(tbl, partitionNames); - :} /* show column stats */ | KW_COLUMN KW_STATS table_name:tbl opt_partition_names:partitionNames {: diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java index b1c6699da7..440a6acca9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java @@ -31,7 +31,7 @@ import org.apache.doris.common.util.PrintableMap; import org.apache.doris.common.util.Util; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; -import org.apache.doris.statistics.ColumnStat; +import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.StatsType; import com.google.common.collect.ImmutableSet; @@ -56,12 +56,12 @@ public class AlterColumnStatsStmt extends DdlStmt { private static final ImmutableSet CONFIGURABLE_PROPERTIES_SET = new ImmutableSet.Builder() .add(StatsType.ROW_COUNT) - .add(ColumnStat.NDV) - .add(ColumnStat.AVG_SIZE) - .add(ColumnStat.MAX_SIZE) - .add(ColumnStat.NUM_NULLS) - .add(ColumnStat.MIN_VALUE) - .add(ColumnStat.MAX_VALUE) + .add(ColumnStatistic.NDV) + .add(ColumnStatistic.AVG_SIZE) + .add(ColumnStatistic.MAX_SIZE) + .add(ColumnStatistic.NUM_NULLS) + .add(ColumnStatistic.MIN_VALUE) + .add(ColumnStatistic.MAX_VALUE) .add(StatsType.DATA_SIZE) .build(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterTableStatsStmt.java deleted file mode 100644 index 2354fbd80e..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterTableStatsStmt.java +++ /dev/null @@ -1,165 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.analysis; - -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.Table; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.ErrorCode; -import org.apache.doris.common.ErrorReport; -import org.apache.doris.common.UserException; -import org.apache.doris.common.util.PrintableMap; -import org.apache.doris.common.util.Util; -import org.apache.doris.mysql.privilege.PrivPredicate; -import org.apache.doris.qe.ConnectContext; -import org.apache.doris.statistics.StatsType; -import org.apache.doris.statistics.TableStats; - -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; - -/** - * Manually inject statistics for tables or partitions. - * Only OLAP table statistics are supported. - * - * syntax: - * ALTER TABLE table_name - * SET STATS ('k1' = 'v1', ...) [ PARTITIONS(p_name1, p_name2...) ] - */ -public class AlterTableStatsStmt extends DdlStmt { - - private static final ImmutableSet CONFIGURABLE_PROPERTIES_SET = - new ImmutableSet.Builder() - .add(TableStats.DATA_SIZE) - .add(TableStats.ROW_COUNT) - .build(); - - private final TableName tableName; - private final PartitionNames optPartitionNames; - private final Map properties; - - private final List partitionNames = Lists.newArrayList(); - private final Map statsTypeToValue = Maps.newHashMap(); - - public AlterTableStatsStmt(TableName tableName, Map properties, - PartitionNames optPartitionNames) { - this.tableName = tableName; - this.properties = properties == null ? Maps.newHashMap() : properties; - this.optPartitionNames = optPartitionNames; - } - - public TableName getTableName() { - return tableName; - } - - public List getPartitionNames() { - return partitionNames; - } - - public Map getStatsTypeToValue() { - return statsTypeToValue; - } - - @Override - public void analyze(Analyzer analyzer) throws UserException { - super.analyze(analyzer); - - // check table name - tableName.analyze(analyzer); - - // disallow external catalog - Util.prohibitExternalCatalog(tableName.getCtl(), this.getClass().getSimpleName()); - - // check partition - checkPartitionNames(); - - // check properties - Optional optional = properties.keySet().stream().map(StatsType::fromString) - .filter(statsType -> !CONFIGURABLE_PROPERTIES_SET.contains(statsType)) - .findFirst(); - if (optional.isPresent()) { - throw new AnalysisException(optional.get() + " is invalid statistics"); - } - - // check auth - if (!Env.getCurrentEnv().getAuth() - .checkTblPriv(ConnectContext.get(), tableName.getDb(), tableName.getTbl(), PrivPredicate.ALTER)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "ALTER TABLE STATS", - ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(), - tableName.getDb() + ": " + tableName.getTbl()); - } - - // get statsTypeToValue - properties.forEach((key, value) -> { - StatsType statsType = StatsType.fromString(key); - statsTypeToValue.put(statsType, value); - }); - } - - private void checkPartitionNames() throws AnalysisException { - Database db = analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(tableName.getDb()); - Table table = db.getTableOrAnalysisException(tableName.getTbl()); - - if (table.getType() != Table.TableType.OLAP) { - throw new AnalysisException("Only OLAP table statistics are supported"); - } - - if (optPartitionNames != null) { - OlapTable olapTable = (OlapTable) table; - - if (!olapTable.isPartitioned()) { - throw new AnalysisException("Not a partitioned table: " + olapTable.getName()); - } - - optPartitionNames.analyze(analyzer); - List names = optPartitionNames.getPartitionNames(); - Set olapPartitionNames = olapTable.getPartitionNames(); - Optional optional = names.stream() - .filter(name -> !olapPartitionNames.contains(name)) - .findFirst(); - if (optional.isPresent()) { - throw new AnalysisException("Partition does not exist: " + optional.get()); - } - partitionNames.addAll(names); - } - } - - @Override - public String toSql() { - StringBuilder sb = new StringBuilder(); - sb.append("ALTER TABLE "); - sb.append(tableName.toSql()); - sb.append(" SET STATS "); - sb.append("("); - sb.append(new PrintableMap<>(properties, - " = ", true, false)); - sb.append(") "); - if (optPartitionNames != null) { - sb.append(optPartitionNames.toSql()); - } - return sb.toString(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java index e89e204982..a153b77ce6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java @@ -31,7 +31,6 @@ import org.apache.doris.mysql.privilege.PaloAuth; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ShowResultSetMetaData; -import org.apache.doris.statistics.StatisticsJob; import com.google.common.base.Preconditions; import com.google.common.base.Strings; @@ -271,7 +270,7 @@ public class ShowAnalyzeStmt extends ShowStmt { stateValue = value.toUpperCase(); try { - StatisticsJob.JobState.valueOf(stateValue); + // support it later } catch (Exception e) { valid = false; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java deleted file mode 100644 index 6a3e630749..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ /dev/null @@ -1,110 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.analysis; - -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.ScalarType; -import org.apache.doris.common.ErrorCode; -import org.apache.doris.common.ErrorReport; -import org.apache.doris.common.UserException; -import org.apache.doris.common.util.Util; -import org.apache.doris.qe.ShowResultSetMetaData; -import org.apache.doris.statistics.TableStats; - -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import com.google.common.collect.ImmutableList; - -import java.util.Collections; -import java.util.List; - -public class ShowTableStatsStmt extends ShowStmt { - - private static final ImmutableList TITLE_NAMES = - new ImmutableList.Builder() - .add("table_name") - .add(TableStats.ROW_COUNT.getValue()) - .add(TableStats.DATA_SIZE.getValue()) - .build(); - - private final TableName tableName; - - // after analyzed - // There is only on attribute for both @tableName and @dbName at the same time. - private String dbName; - - private final PartitionNames partitionNames; - - public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames) { - this.tableName = tableName; - this.partitionNames = partitionNames; - } - - public String getTableName() { - Preconditions.checkArgument(isAnalyzed(), "The db name must be obtained after the parsing is complete"); - if (tableName == null) { - return null; - } - return tableName.getTbl(); - } - - public String getDbName() { - Preconditions.checkArgument(isAnalyzed(), "The db name must be obtained after the parsing is complete"); - if (tableName == null) { - return dbName; - } - return tableName.getDb(); - } - - public List getPartitionNames() { - if (partitionNames == null) { - return Collections.emptyList(); - } - return partitionNames.getPartitionNames(); - } - - @Override - public void analyze(Analyzer analyzer) throws UserException { - super.analyze(analyzer); - if (tableName == null) { - dbName = analyzer.getDefaultDb(); - if (Strings.isNullOrEmpty(dbName)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_NO_DB_ERROR); - } - return; - } - tableName.analyze(analyzer); - - if (partitionNames != null) { - partitionNames.analyze(analyzer); - } - - // disallow external catalog - Util.prohibitExternalCatalog(tableName.getCtl(), this.getClass().getSimpleName()); - } - - @Override - public ShowResultSetMetaData getMetaData() { - ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder(); - - for (String title : TITLE_NAMES) { - builder.addColumn(new Column(title, ScalarType.createVarchar(30))); - } - return builder.build(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index e29713731f..7321ddbb51 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -211,10 +211,6 @@ import org.apache.doris.service.FrontendOptions; import org.apache.doris.statistics.AnalysisManager; import org.apache.doris.statistics.AnalysisTaskScheduler; import org.apache.doris.statistics.StatisticsCache; -import org.apache.doris.statistics.StatisticsJobManager; -import org.apache.doris.statistics.StatisticsJobScheduler; -import org.apache.doris.statistics.StatisticsManager; -import org.apache.doris.statistics.StatisticsTaskScheduler; import org.apache.doris.system.Backend; import org.apache.doris.system.FQDNManager; import org.apache.doris.system.Frontend; @@ -396,11 +392,6 @@ public class Env { private DeployManager deployManager; private TabletStatMgr tabletStatMgr; - // statistics - private StatisticsManager statisticsManager; - private StatisticsJobManager statisticsJobManager; - private StatisticsJobScheduler statisticsJobScheduler; - private StatisticsTaskScheduler statisticsTaskScheduler; private PaloAuth auth; @@ -594,11 +585,6 @@ public class Env { this.globalTransactionMgr = new GlobalTransactionMgr(this); this.tabletStatMgr = new TabletStatMgr(); - // statistics - this.statisticsManager = new StatisticsManager(); - this.statisticsJobManager = new StatisticsJobManager(); - this.statisticsJobScheduler = new StatisticsJobScheduler(); - this.statisticsTaskScheduler = new StatisticsTaskScheduler(); this.auth = new PaloAuth(); this.domainResolver = new DomainResolver(auth); @@ -756,23 +742,6 @@ public class Env { return checkpointer; } - // statistics - public StatisticsManager getStatisticsManager() { - return statisticsManager; - } - - public StatisticsJobManager getStatisticsJobManager() { - return statisticsJobManager; - } - - public StatisticsJobScheduler getStatisticsJobScheduler() { - return statisticsJobScheduler; - } - - public StatisticsTaskScheduler getStatisticsTaskScheduler() { - return statisticsTaskScheduler; - } - // Use tryLock to avoid potential dead lock private boolean tryLock(boolean mustLock) { while (true) { @@ -1429,8 +1398,6 @@ public class Env { partitionInMemoryInfoCollector.start(); streamLoadRecordMgr.start(); getInternalCatalog().getIcebergTableCreationRecordMgr().start(); - this.statisticsJobScheduler.start(); - this.statisticsTaskScheduler.start(); new InternalSchemaInitializer().start(); if (Config.enable_fqdn_mode) { fqdnManager.start(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 5346ec4adc..98b76cdca9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -484,15 +484,12 @@ public class OlapScanNode extends ScanNode { * Remove the method after statistics collection is working properly */ public void mockRowCountInStatistic() { - long tableId = desc.getTable().getId(); cardinality = 0; for (long selectedPartitionId : selectedPartitionIds) { final Partition partition = olapTable.getPartition(selectedPartitionId); final MaterializedIndex baseIndex = partition.getBaseIndex(); cardinality += baseIndex.getRowCount(); } - Env.getCurrentEnv().getStatisticsManager() - .getStatistics().mockTableStatsWithRowCount(tableId, cardinality); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java index d48f8fdfdf..e6f5702f2e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java @@ -39,7 +39,6 @@ import org.apache.doris.analysis.AlterResourceStmt; import org.apache.doris.analysis.AlterRoutineLoadStmt; import org.apache.doris.analysis.AlterSqlBlockRuleStmt; import org.apache.doris.analysis.AlterSystemStmt; -import org.apache.doris.analysis.AlterTableStatsStmt; import org.apache.doris.analysis.AlterTableStmt; import org.apache.doris.analysis.AlterUserStmt; import org.apache.doris.analysis.AlterViewStmt; @@ -165,8 +164,6 @@ public class DdlExecutor { env.createMaterializedView((CreateMaterializedViewStmt) ddlStmt); } else if (ddlStmt instanceof AlterTableStmt) { env.alterTable((AlterTableStmt) ddlStmt); - } else if (ddlStmt instanceof AlterTableStatsStmt) { - env.getStatisticsManager().alterTableStatistics((AlterTableStatsStmt) ddlStmt); } else if (ddlStmt instanceof AlterColumnStatsStmt) { StatisticsRepository.alterColumnStatistics((AlterColumnStatsStmt) ddlStmt); } else if (ddlStmt instanceof AlterViewStmt) { @@ -342,7 +339,7 @@ public class DdlExecutor { } else if (ddlStmt instanceof AlterUserStmt) { env.getAuth().alterUser((AlterUserStmt) ddlStmt); } else if (ddlStmt instanceof DropTableStatsStmt) { - env.getStatisticsManager().dropStats((DropTableStatsStmt) ddlStmt); + // TODO: support later } else { throw new DdlException("Unknown statement."); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 0e672f8cba..0eb3656f08 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -85,7 +85,6 @@ import org.apache.doris.analysis.ShowStreamLoadStmt; import org.apache.doris.analysis.ShowSyncJobStmt; import org.apache.doris.analysis.ShowTableCreationStmt; import org.apache.doris.analysis.ShowTableIdStmt; -import org.apache.doris.analysis.ShowTableStatsStmt; import org.apache.doris.analysis.ShowTableStatusStmt; import org.apache.doris.analysis.ShowTableStmt; import org.apache.doris.analysis.ShowTabletStmt; @@ -179,7 +178,6 @@ import org.apache.doris.mtmv.metadata.MTMVJob; import org.apache.doris.mtmv.metadata.MTMVTask; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.statistics.ColumnStatistic; -import org.apache.doris.statistics.StatisticsJobManager; import org.apache.doris.statistics.StatisticsRepository; import org.apache.doris.system.Backend; import org.apache.doris.system.Diagnoser; @@ -360,8 +358,6 @@ public class ShowExecutor { handleShowSyncJobs(); } else if (stmt instanceof ShowSqlBlockRuleStmt) { handleShowSqlBlockRule(); - } else if (stmt instanceof ShowTableStatsStmt) { - handleShowTableStats(); } else if (stmt instanceof ShowColumnStatsStmt) { handleShowColumnStats(); } else if (stmt instanceof ShowTableCreationStmt) { @@ -2141,12 +2137,6 @@ public class ShowExecutor { } - private void handleShowTableStats() throws AnalysisException { - ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt; - List> results = Env.getCurrentEnv().getStatisticsManager().showTableStatsList(showTableStatsStmt); - resultSet = new ShowResultSet(showTableStatsStmt.getMetaData(), results); - } - private void handleShowColumnStats() throws AnalysisException { ShowColumnStatsStmt showColumnStatsStmt = (ShowColumnStatsStmt) stmt; TableName tableName = showColumnStatsStmt.getTableName(); @@ -2313,10 +2303,7 @@ public class ShowExecutor { } private void handleShowAnalyze() throws AnalysisException { - ShowAnalyzeStmt showStmt = (ShowAnalyzeStmt) stmt; - StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager(); - List> results = jobManager.getAnalyzeJobInfos(showStmt); - resultSet = new ShowResultSet(showStmt.getMetaData(), results); + // TODO: Support later } private void handleCopyTablet() throws AnalysisException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java index ff98890cf7..783c73b6de 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java @@ -102,9 +102,9 @@ public class AnalysisTaskExecutor extends Thread { private void doFetchAndExecute() { BaseAnalysisTask task = taskScheduler.getPendingTasks(); - AnalysisTaskWrapper jobWrapper = new AnalysisTaskWrapper(this, task); + AnalysisTaskWrapper taskWrapper = new AnalysisTaskWrapper(this, task); incr(); - executors.submit(jobWrapper); + executors.submit(taskWrapper); Env.getCurrentEnv().getAnalysisManager() .updateTaskStatus(task.info, AnalysisState.RUNNING, "", System.currentTimeMillis()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStat.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStat.java deleted file mode 100644 index d6333a27a7..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStat.java +++ /dev/null @@ -1,332 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.analysis.LiteralExpr; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.util.Util; -import org.apache.doris.statistics.util.StatisticsUtil; - -import com.google.common.collect.Lists; - -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.function.Predicate; - -/** - * There are the statistics of column. - * The column stats are mainly used to provide input for the Optimizer's cost model. - *

- * The description of column stats are following: - * 1. @ndv: The number distinct values of column. - * 2. @avgSize: The average size of column. The unit is bytes. - * 3. @maxSize: The max size of column. The unit is bytes. - * 4. @numNulls: The number of nulls. - * 5. @minValue: The min value of column. - * 6. @maxValue: The max value of column. - *

- * The granularity of the statistics is whole table. - * For example: - * "@ndv = 10" means that the number distinct values is 10 in the whole table. - */ -public class ColumnStat { - - public static final StatsType NDV = StatsType.NDV; - public static final StatsType AVG_SIZE = StatsType.AVG_SIZE; - public static final StatsType MAX_SIZE = StatsType.MAX_SIZE; - public static final StatsType NUM_NULLS = StatsType.NUM_NULLS; - public static final StatsType MIN_VALUE = StatsType.MIN_VALUE; - public static final StatsType MAX_VALUE = StatsType.MAX_VALUE; - - public static final ColumnStat UNKNOWN = new ColumnStat(); - - private static final Predicate DESIRED_NDV_PRED = (v) -> v >= -1L; - private static final Predicate DESIRED_AVG_SIZE_PRED = (v) -> (v == -1) || (v >= 0); - private static final Predicate DESIRED_MAX_SIZE_PRED = (v) -> v >= -1L; - private static final Predicate DESIRED_NUM_NULLS_PRED = (v) -> v >= -1L; - - public static final Set MAX_MIN_UNSUPPORTED_TYPE = new HashSet<>(); - - static { - MAX_MIN_UNSUPPORTED_TYPE.add(Type.HLL); - MAX_MIN_UNSUPPORTED_TYPE.add(Type.BITMAP); - MAX_MIN_UNSUPPORTED_TYPE.add(Type.ARRAY); - MAX_MIN_UNSUPPORTED_TYPE.add(Type.STRUCT); - MAX_MIN_UNSUPPORTED_TYPE.add(Type.MAP); - } - - private double ndv = -1; - private double avgSizeByte = -1; - private double maxSizeByte = -1; - private double numNulls = -1; - private double minValue = Double.NaN; - private double maxValue = Double.NaN; - // For display only. - private LiteralExpr minExpr; - private LiteralExpr maxExpr; - - private double selectivity = 1.0; - - public static ColumnStat createDefaultColumnStats() { - ColumnStat columnStat = new ColumnStat(); - columnStat.setAvgSizeByte(1); - columnStat.setMaxSizeByte(1); - columnStat.setNdv(1); - columnStat.setNumNulls(0); - return columnStat; - } - - public static boolean isUnKnown(ColumnStat stats) { - return stats == UNKNOWN; - } - - public ColumnStat() { - } - - public ColumnStat(ColumnStat other) { - this.ndv = other.ndv; - this.avgSizeByte = other.avgSizeByte; - this.maxSizeByte = other.maxSizeByte; - this.numNulls = other.numNulls; - this.minValue = other.minValue; - this.maxValue = other.maxValue; - this.selectivity = other.selectivity; - } - - public ColumnStat(double ndv, double avgSizeByte, - double maxSizeByte, double numNulls, double minValue, double maxValue) { - this.ndv = ndv; - this.avgSizeByte = avgSizeByte; - this.maxSizeByte = maxSizeByte; - this.numNulls = numNulls; - this.minValue = minValue; - this.maxValue = maxValue; - } - - public double getNdv() { - return ndv; - } - - public double getAvgSizeByte() { - return avgSizeByte; - } - - public double getMaxSizeByte() { - return maxSizeByte; - } - - public double getNumNulls() { - return numNulls; - } - - public double getMinValue() { - return minValue; - } - - public double getMaxValue() { - return maxValue; - } - - public void setNdv(double ndv) { - this.ndv = ndv; - } - - public void setAvgSizeByte(double avgSizeByte) { - this.avgSizeByte = avgSizeByte; - } - - public void setMaxSizeByte(double maxSizeByte) { - this.maxSizeByte = maxSizeByte; - } - - public void setNumNulls(double numNulls) { - this.numNulls = numNulls; - } - - public void setMinValue(double minValue) { - this.minValue = minValue; - } - - public void setMaxValue(double maxValue) { - this.maxValue = maxValue; - } - - public void updateStats(Type columnType, Map statsTypeToValue) throws AnalysisException { - for (Map.Entry entry : statsTypeToValue.entrySet()) { - StatsType statsType = entry.getKey(); - switch (statsType) { - case NDV: - ndv = Util.getDoublePropertyOrDefault(entry.getValue(), ndv, - DESIRED_NDV_PRED, NDV + " should >= -1"); - break; - case AVG_SIZE: - avgSizeByte = Util.getDoublePropertyOrDefault(entry.getValue(), avgSizeByte, - DESIRED_AVG_SIZE_PRED, AVG_SIZE + " should (>=0) or (=-1)"); - break; - case MAX_SIZE: - maxSizeByte = Util.getDoublePropertyOrDefault(entry.getValue(), maxSizeByte, - DESIRED_MAX_SIZE_PRED, MAX_SIZE + " should >=-1"); - break; - case NUM_NULLS: - numNulls = Util.getDoublePropertyOrDefault(entry.getValue(), numNulls, - DESIRED_NUM_NULLS_PRED, NUM_NULLS + " should >=-1"); - break; - case MIN_VALUE: - if (MAX_MIN_UNSUPPORTED_TYPE.contains(statsType)) { - minValue = Double.NEGATIVE_INFINITY; - } else { - minExpr = StatisticsUtil.readableValue(columnType, entry.getValue()); - minValue = StatisticsUtil.convertToDouble(columnType, entry.getValue()); - } - break; - case MAX_VALUE: - if (MAX_MIN_UNSUPPORTED_TYPE.contains(statsType)) { - maxValue = Double.NEGATIVE_INFINITY; - } else { - maxExpr = StatisticsUtil.readableValue(columnType, entry.getValue()); - maxValue = StatisticsUtil.convertToDouble(columnType, entry.getValue()); - } - break; - default: - throw new AnalysisException("Unknown stats type: " + statsType); - } - } - } - - public List getShowInfo() { - List result = Lists.newArrayList(); - result.add(Double.toString(ndv)); - result.add(Double.toString(avgSizeByte)); - result.add(Double.toString(maxSizeByte)); - result.add(Double.toString(numNulls)); - result.add(Double.toString(minValue)); - result.add(Double.toString(maxValue)); - return result; - } - - public ColumnStat copy() { - return new ColumnStat(this); - } - - - public boolean hasIntersect(ColumnStat another) { - double leftMin = this.getMinValue(); - double rightMin = another.getMinValue(); - double leftMax = this.getMaxValue(); - double rightMax = another.getMaxValue(); - return Math.max(leftMin, rightMin) <= Math.min(leftMax, rightMax); - } - - /** - * Return default column statistic. - */ - public static ColumnStat getDefaultColumnStats() { - return new ColumnStat(); - } - - /** - * Merge column statistics(the original statistics should not be modified) - * - * @param left statistics to be merged - * @param right statistics to be merged - */ - public static ColumnStat mergeColumnStats(ColumnStat left, ColumnStat right) { - // merge ndv - double leftNdv = left.getNdv(); - double rightNdv = right.getNdv(); - - if (leftNdv == -1) { - leftNdv = rightNdv; - } else { - leftNdv = rightNdv != -1 ? (leftNdv + rightNdv) : leftNdv; - } - - double leftAvgSize = left.getAvgSizeByte(); - double rightAvgSize = right.getAvgSizeByte(); - if (leftAvgSize == -1) { - leftAvgSize = rightAvgSize; - } else { - leftAvgSize = rightAvgSize != -1 ? ((leftAvgSize + rightAvgSize) / 2) : leftAvgSize; - } - - // merge max_size - double leftMaxSize = left.getMaxSizeByte(); - double rightMaxSize = right.getMaxSizeByte(); - if (leftMaxSize == -1) { - leftMaxSize = rightMaxSize; - } else { - leftMaxSize = Math.max(leftMaxSize, rightMaxSize); - } - - // merge num_nulls - double leftNumNulls = left.getNumNulls(); - double rightNumNulls = right.getNumNulls(); - if (leftNumNulls == -1) { - leftNumNulls = rightNumNulls; - } else { - leftNumNulls = rightNumNulls != -1 ? (leftNumNulls + rightNumNulls) : leftNumNulls; - } - - // merge min_value - double leftMinValue = left.getMinValue(); - double rightMinValue = right.getMinValue(); - leftMinValue = Math.min(leftMinValue, rightMinValue); - - // merge max_value - double leftMaxValue = left.getMaxValue(); - double rightMaxValue = right.getMaxValue(); - leftMaxValue = Math.max(rightMaxValue, leftMaxValue); - - // generate the new merged-statistics - return new ColumnStat(leftNdv, leftAvgSize, leftMaxSize, leftNumNulls, leftMinValue, leftMaxValue); - } - - public static boolean isAlmostUnique(double ndv, double rowCount) { - return rowCount * 0.9 < ndv && ndv < rowCount * 1.1; - } - - public double getSelectivity() { - return selectivity; - } - - public void setSelectivity(double selectivity) { - this.selectivity = selectivity; - } - - public double ndvIntersection(ColumnStat other) { - if (maxValue == minValue) { - if (minValue <= other.maxValue && minValue >= other.minValue) { - return 1; - } else { - return 0; - } - } - double min = Math.max(minValue, other.minValue); - double max = Math.min(maxValue, other.maxValue); - if (min < max) { - return Math.ceil(ndv * (max - min) / (maxValue - minValue)); - } else if (min > max) { - return 0; - } else { - return 1; - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 7d4d7ac2e5..a5aee62c8e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -36,6 +36,13 @@ import java.util.Set; public class ColumnStatistic { + public static final StatsType NDV = StatsType.NDV; + public static final StatsType AVG_SIZE = StatsType.AVG_SIZE; + public static final StatsType MAX_SIZE = StatsType.MAX_SIZE; + public static final StatsType NUM_NULLS = StatsType.NUM_NULLS; + public static final StatsType MIN_VALUE = StatsType.MIN_VALUE; + public static final StatsType MAX_VALUE = StatsType.MAX_VALUE; + private static final Logger LOG = LogManager.getLogger(StmtExecutor.class); public static ColumnStatistic DEFAULT = new ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1) @@ -137,6 +144,10 @@ public class ColumnStatistic { } } + public static boolean isAlmostUnique(double ndv, double rowCount) { + return rowCount * 0.9 < ndv && ndv < rowCount * 1.1; + } + public ColumnStatistic copy() { return new ColumnStatisticBuilder().setCount(count).setNdv(ndv).setAvgSizeByte(avgSizeByte) .setNumNulls(numNulls).setDataSize(dataSize).setMinValue(minValue) @@ -186,7 +197,7 @@ public class ColumnStatistic { } ColumnStatisticBuilder builder = new ColumnStatisticBuilder(this); Double rowsAfterFilter = rowCount * selectivity; - if (ColumnStat.isAlmostUnique(ndv, rowCount)) { + if (isAlmostUnique(ndv, rowCount)) { builder.setSelectivity(this.selectivity * selectivity); builder.setNdv(ndv * selectivity); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java deleted file mode 100644 index b900085ca7..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java +++ /dev/null @@ -1,147 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.Partition; -import org.apache.doris.catalog.Table; -import org.apache.doris.common.DdlException; -import org.apache.doris.statistics.StatisticsTaskResult.TaskResult; - -import com.google.common.collect.Lists; - -import java.util.List; - -/** - * A statistics task that directly collects statistics by reading FE meta. - * e.g. for fixed-length types such as Int type and Long type we get their size from metadata. - * 1.The granularity of row count can be table or partition, and the type should be table or partition - * 2.The granularity of data size can be table or partition, and the type should be table or partition - * 3.The granularity of max and min size can be table or partition, and the type should be column - */ -public class MetaStatisticsTask extends StatisticsTask { - public MetaStatisticsTask(long jobId, List statsDescs) { - super(jobId, statsDescs); - } - - @Override - public StatisticsTaskResult call() throws Exception { - checkStatisticsDesc(); - List taskResults = Lists.newArrayList(); - - for (StatisticsDesc statsDesc : statsDescs) { - StatsCategory category = statsDesc.getStatsCategory(); - StatsGranularity granularity = statsDesc.getStatsGranularity(); - TaskResult result = createNewTaskResult(category, granularity); - List statsTypes = statsDesc.getStatsTypes(); - - for (StatsType statsType : statsTypes) { - switch (statsType) { - case MAX_SIZE: - case AVG_SIZE: - getColSize(category, statsType, result); - break; - case ROW_COUNT: - getRowCount(category.getDbId(), category.getTableId(), granularity, result); - break; - case DATA_SIZE: - getDataSize(category.getDbId(), category.getTableId(), granularity, result); - break; - default: - throw new DdlException("Unsupported statistics type(" + statsType + ")."); - } - } - - taskResults.add(result); - } - - return new StatisticsTaskResult(taskResults); - } - - private void getColSize(StatsCategory category, StatsType statsType, - TaskResult result) throws DdlException { - OlapTable table = getNotNullOlapTable(category.getDbId(), category.getTableId()); - Column column = getNotNullColumn(table, category.getColumnName()); - int colSize = column.getDataType().getSlotSize(); - result.getStatsTypeToValue().put(statsType, String.valueOf(colSize)); - } - - private void getRowCount(long dbId, long tableId, StatsGranularity granularity, - TaskResult result) throws DdlException { - OlapTable table = getNotNullOlapTable(dbId, tableId); - - switch (granularity.getGranularity()) { - case TABLE: - long tblRowCount = table.getRowCount(); - result.getStatsTypeToValue().put(StatsType.ROW_COUNT, String.valueOf(tblRowCount)); - break; - case PARTITION: - Partition partition = getNotNullPartition(granularity, table); - long ptRowCount = partition.getBaseIndex().getRowCount(); - result.getStatsTypeToValue().put(StatsType.ROW_COUNT, String.valueOf(ptRowCount)); - break; - case TABLET: - default: - throw new DdlException("Unsupported granularity(" + granularity + ")."); - } - } - - private void getDataSize(long dbId, long tableId, StatsGranularity granularity, - TaskResult result) throws DdlException { - OlapTable table = getNotNullOlapTable(dbId, tableId); - - switch (granularity.getGranularity()) { - case TABLE: - long tblDataSize = table.getDataSize(); - result.getStatsTypeToValue().put(StatsType.DATA_SIZE, String.valueOf(tblDataSize)); - break; - case PARTITION: - Partition partition = getNotNullPartition(granularity, table); - long partitionSize = partition.getBaseIndex().getDataSize(); - result.getStatsTypeToValue().put(StatsType.DATA_SIZE, String.valueOf(partitionSize)); - break; - case TABLET: - default: - throw new DdlException("Unsupported granularity(" + granularity + ")."); - } - } - - private OlapTable getNotNullOlapTable(long dbId, long tableId) throws DdlException { - Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(dbId); - return (OlapTable) db.getTableOrDdlException(tableId); - } - - private Partition getNotNullPartition(StatsGranularity granularity, OlapTable olapTable) throws DdlException { - Partition partition = olapTable.getPartition(granularity.getPartitionId()); - if (partition == null) { - throw new DdlException("Partition(" + granularity.getPartitionId() + ") not found."); - } - return partition; - } - - private Column getNotNullColumn(Table table, String colName) throws DdlException { - Column column = table.getColumn(colName); - if (column == null) { - throw new DdlException("Column(" + colName + ") not found."); - } - return column; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java deleted file mode 100644 index 248d07b24b..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java +++ /dev/null @@ -1,163 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Type; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.util.Util; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import java.util.List; -import java.util.Map; -import java.util.function.Predicate; - -/** - * There are the statistics of partition. - * The partition stats are mainly used to provide input for the Optimizer's cost model. - * The description of partition stats are following: - * - @rowCount: The row count of partition. - * - @dataSize: The data size of partition. - * - @nameToColumnStats: <@String columnName, @ColumnStats columnStats> - *

- * Each column in the Table will have corresponding @ColumnStats. - * Those @ColumnStats are recorded in @nameToColumnStats form of MAP. - * This facilitates the optimizer to quickly find the corresponding: - * - @ColumnStats: based on the column name. - * - @rowCount: The row count of partition. - * - @dataSize: The data size of partition. - *

- * The granularity of the statistics is whole partition. - * For example: "@rowCount = 1000" means that the row count is 1000 in the whole partition. - *

- * After the statistics task is successfully completed, update the PartitionStats, - * PartitionStats should not be updated in any other way. - */ -public class PartitionStats { - public static final StatsType DATA_SIZE = StatsType.DATA_SIZE; - public static final StatsType ROW_COUNT = StatsType.ROW_COUNT; - - private static final Predicate DESIRED_ROW_COUNT_PRED = (v) -> v >= -1L; - private static final Predicate DESIRED_DATA_SIZE_PRED = (v) -> v >= -1L; - - private long rowCount = -1; - private long dataSize = -1; - private final Map nameToColumnStats = Maps.newConcurrentMap(); - - /** - * Return a default partition statistic. - */ - public static PartitionStats getDefaultPartitionStats() { - return new PartitionStats(); - } - - public PartitionStats() { - } - - public PartitionStats(long rowCount, long dataSize) { - this.rowCount = rowCount; - this.dataSize = dataSize; - } - - public long getRowCount() { - return rowCount; - } - - public void setRowCount(long rowCount) { - this.rowCount = rowCount; - } - - public long getDataSize() { - return dataSize; - } - - public void setDataSize(long dataSize) { - this.dataSize = dataSize; - } - - public Map getNameToColumnStats() { - return nameToColumnStats; - } - - public ColumnStat getColumnStats(String columnName) { - return nameToColumnStats.get(columnName); - } - - /** - * If the column statistics do not exist, the default statistics will be returned. - */ - public ColumnStat getColumnStatsOrDefault(String columnName) { - return nameToColumnStats.getOrDefault(columnName, - ColumnStat.getDefaultColumnStats()); - } - - /** - * Show the partition row count and data size. - */ - public List getShowInfo() { - List result = Lists.newArrayList(); - result.add(Long.toString(rowCount)); - result.add(Long.toString(dataSize)); - return result; - } - - /** - * After the statistics task is successfully completed, update the statistics of the partition, - * statistics should not be updated in any other way. - */ - public void updatePartitionStats(Map statsTypeToValue) throws AnalysisException { - for (Map.Entry entry : statsTypeToValue.entrySet()) { - StatsType statsType = entry.getKey(); - String value = entry.getValue(); - if (statsType == ROW_COUNT) { - rowCount = Util.getLongPropertyOrDefault(value, rowCount, - DESIRED_ROW_COUNT_PRED, ROW_COUNT + " should >= -1"); - } else if (statsType == DATA_SIZE) { - dataSize = Util.getLongPropertyOrDefault(value, dataSize, - DESIRED_DATA_SIZE_PRED, DATA_SIZE + " should >= -1"); - } - } - } - - /** - * After the statistics task is successfully completed, update the statistics of the column, - * statistics should not be updated in any other way. - */ - public void updateColumnStats(String columnName, - Type columnType, - Map statsTypeToValue) throws AnalysisException { - ColumnStat columnStat = getNotNullColumnStats(columnName); - columnStat.updateStats(columnType, statsTypeToValue); - } - - /** - * If column stats is not exist, create a new one. - * - * @param columnName column name - * @return @ColumnStats - */ - public ColumnStat getNotNullColumnStats(String columnName) { - ColumnStat columnStat = nameToColumnStats.get(columnName); - if (columnStat == null) { - columnStat = new ColumnStat(); - nameToColumnStats.put(columnName, columnStat); - } - return columnStat; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/SQLStatisticsTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/SQLStatisticsTask.java deleted file mode 100644 index e12a04b3f2..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/SQLStatisticsTask.java +++ /dev/null @@ -1,142 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.Table; -import org.apache.doris.common.DdlException; -import org.apache.doris.common.InvalidFormatException; -import org.apache.doris.statistics.StatisticsTaskResult.TaskResult; -import org.apache.doris.statistics.StatsGranularity.Granularity; -import org.apache.doris.statistics.util.InternalQuery; -import org.apache.doris.statistics.util.InternalQueryResult; -import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; -import org.apache.doris.statistics.util.InternalSqlTemplate; -import org.apache.doris.statistics.util.InternalSqlTemplate.QueryType; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import java.util.List; -import java.util.Map; - -/** - * A statistics task that collects statistics by executing query. - * The results of the query will be returned as @StatisticsTaskResult. - */ -public class SQLStatisticsTask extends StatisticsTask { - protected QueryType queryType = QueryType.FULL; - - protected String statement; - - public SQLStatisticsTask(long jobId, List statsDescs) { - super(jobId, statsDescs); - } - - @Override - public StatisticsTaskResult call() throws Exception { - checkStatisticsDesc(); - List taskResults = Lists.newArrayList(); - - for (StatisticsDesc statsDesc : statsDescs) { - statement = constructQuery(statsDesc); - TaskResult taskResult = executeQuery(statsDesc); - taskResults.add(taskResult); - LOG.info("Collected statistics successfully by SQL: {}", statement); - } - - return new StatisticsTaskResult(taskResults); - } - - protected String constructQuery(StatisticsDesc statsDesc) throws DdlException, - InvalidFormatException { - Map params = getQueryParams(statsDesc); - - List statsTypes = statsDesc.getStatsTypes(); - StatsType type = statsTypes.get(0); - - StatsGranularity statsGranularity = statsDesc.getStatsGranularity(); - Granularity granularity = statsGranularity.getGranularity(); - boolean nonPartitioned = granularity != Granularity.PARTITION; - - switch (type) { - case ROW_COUNT: - return nonPartitioned ? InternalSqlTemplate.buildStatsRowCountSql(params, queryType) - : InternalSqlTemplate.buildStatsPartitionRowCountSql(params, queryType); - case NUM_NULLS: - return nonPartitioned ? InternalSqlTemplate.buildStatsNumNullsSql(params, queryType) - : InternalSqlTemplate.buildStatsPartitionNumNullsSql(params, queryType); - case MAX_SIZE: - case AVG_SIZE: - return nonPartitioned ? InternalSqlTemplate.buildStatsMaxAvgSizeSql(params, queryType) - : InternalSqlTemplate.buildStatsPartitionMaxAvgSizeSql(params, queryType); - case NDV: - case MAX_VALUE: - case MIN_VALUE: - return nonPartitioned ? InternalSqlTemplate.buildStatsMinMaxNdvValueSql(params, queryType) - : InternalSqlTemplate.buildStatsPartitionMinMaxNdvValueSql(params, queryType); - case DATA_SIZE: - default: - throw new DdlException("Unsupported statistics type: " + type); - } - } - - protected TaskResult executeQuery(StatisticsDesc statsDesc) throws Exception { - StatsGranularity granularity = statsDesc.getStatsGranularity(); - List statsTypes = statsDesc.getStatsTypes(); - StatsCategory category = statsDesc.getStatsCategory(); - - String dbName = Env.getCurrentInternalCatalog() - .getDbOrDdlException(category.getDbId()).getFullName(); - InternalQuery query = new InternalQuery(dbName, statement); - InternalQueryResult queryResult = query.query(); - List resultRows = queryResult.getResultRows(); - - if (resultRows != null && resultRows.size() == 1) { - ResultRow resultRow = resultRows.get(0); - List columns = resultRow.getColumns(); - TaskResult result = createNewTaskResult(category, granularity); - - if (columns.size() == statsTypes.size()) { - for (int i = 0; i < columns.size(); i++) { - StatsType statsType = StatsType.fromString(columns.get(i)); - result.getStatsTypeToValue().put(statsType, resultRow.getString(i)); - } - return result; - } - } - - // Statistics statements are executed singly and return only one row data - throw new DdlException("Statistics query result is incorrect, statement: " - + statement + " queryResult: " + queryResult); - } - - protected Map getQueryParams(StatisticsDesc statsDesc) throws DdlException { - StatsCategory category = statsDesc.getStatsCategory(); - Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(category.getDbId()); - Table table = db.getTableOrDdlException(category.getTableId()); - - Map params = Maps.newHashMap(); - params.put(InternalSqlTemplate.TABLE, table.getName()); - params.put(InternalSqlTemplate.PARTITION, category.getPartitionName()); - params.put(InternalSqlTemplate.COLUMN, category.getColumnName()); - - return params; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/SampleSQLStatisticsTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/SampleSQLStatisticsTask.java deleted file mode 100644 index 9cf3dd365d..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/SampleSQLStatisticsTask.java +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.common.Config; -import org.apache.doris.common.DdlException; -import org.apache.doris.statistics.util.InternalSqlTemplate; -import org.apache.doris.statistics.util.InternalSqlTemplate.QueryType; - -import java.util.List; -import java.util.Map; - -/** - * The @SampleSQLStatisticsTask is also a statistical task that executes a query - * and uses the query result as a statistical value (same as @SQLStatisticsTask). - * The only difference from the SQLStatisticsTask is that the query is a sampling table query. - */ -public class SampleSQLStatisticsTask extends SQLStatisticsTask { - // TODO(wzt): If the job configuration has percentage value, obtain from the job, - // if not, use the default value. - private int samplePercentage = Config.cbo_default_sample_percentage; - - public SampleSQLStatisticsTask(long jobId, List statsDescs) { - super(jobId, statsDescs); - queryType = QueryType.SAMPLE; - } - - @Override - protected Map getQueryParams(StatisticsDesc statsDesc) throws DdlException { - Map params = super.getQueryParams(statsDesc); - params.put(InternalSqlTemplate.PERCENT, String.valueOf(samplePercentage)); - return params; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java deleted file mode 100644 index d859b4a4f8..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ /dev/null @@ -1,228 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Type; -import org.apache.doris.common.AnalysisException; - -import com.google.common.base.Strings; -import com.google.common.collect.Maps; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.Map; - -/** - * There are the statistics of all tables. - * The @Statistics are mainly used to provide input for the Optimizer's cost model. - * - * @idToTableStats: <@Long tableId, @TableStats tableStats> - * - Each table will have corresponding @TableStats - * - Those @TableStats are recorded in @idToTableStats form of MAP. - * - This facilitates the optimizer to quickly find the corresponding - * @TableStats based on the table id. - */ -public class Statistics { - private static final Logger LOG = LogManager.getLogger(Statistics.class); - - private final Map idToTableStats = Maps.newConcurrentMap(); - - /** - * Get the table stats for the given table id. - * - * @param tableId table id - * @return @TableStats - * @throws AnalysisException if table stats not exists - */ - public TableStats getTableStats(long tableId) throws AnalysisException { - TableStats tableStats = idToTableStats.get(tableId); - if (tableStats == null) { - throw new AnalysisException("Table " + tableId + " has no statistics"); - } - return tableStats; - } - - /** - * If the table statistics do not exist, the default statistics will be returned. - */ - public TableStats getTableStatsOrDefault(long tableId) throws AnalysisException { - return idToTableStats.getOrDefault(tableId, TableStats.getDefaultTableStats()); - } - - /** - * Get the partitions stats for the given table id. - * - * @param tableId table id - * @return partition name and @PartitionStats - * @throws AnalysisException if partitions stats not exists - */ - public Map getPartitionStats(long tableId) throws AnalysisException { - TableStats tableStats = getTableStats(tableId); - Map nameToPartitionStats = tableStats.getNameToPartitionStats(); - if (nameToPartitionStats == null) { - throw new AnalysisException("Table " + tableId + " has no partition statistics"); - } - return nameToPartitionStats; - } - - /** - * Get the partition stats for the given table id and partition name. - * - * @param tableId table id - * @param partitionName partition name - * @return partition name and @PartitionStats - * @throws AnalysisException if partition stats not exists - */ - public Map getPartitionStats(long tableId, String partitionName) - throws AnalysisException { - Map partitionStats = getPartitionStats(tableId); - PartitionStats partitionStat = partitionStats.get(partitionName); - if (partitionStat == null) { - throw new AnalysisException("Partition " + partitionName + " of table " + tableId + " has no statistics"); - } - Map statsMap = Maps.newHashMap(); - statsMap.put(partitionName, partitionStat); - return statsMap; - } - - /** - * Get the columns stats for the given table id. - * - * @param tableId table id - * @return column name and @ColumnStats - * @throws AnalysisException if columns stats not exists - */ - public Map getColumnStats(long tableId) throws AnalysisException { - TableStats tableStats = getTableStats(tableId); - Map nameToColumnStats = tableStats.getNameToColumnStats(); - if (nameToColumnStats == null) { - throw new AnalysisException("Table " + tableId + " has no column statistics"); - } - return nameToColumnStats; - } - - /** - * Get the columns stats for the given table id and partition name. - * - * @param tableId table id - * @param partitionName partition name - * @return column name and @ColumnStats - * @throws AnalysisException if column stats not exists - */ - public Map getColumnStats(long tableId, String partitionName) throws AnalysisException { - Map partitionStats = getPartitionStats(tableId, partitionName); - PartitionStats partitionStat = partitionStats.get(partitionName); - if (partitionStat == null) { - throw new AnalysisException("Partition " + partitionName + " of table " + tableId + " has no statistics"); - } - return partitionStat.getNameToColumnStats(); - } - - public void updateTableStats(long tableId, Map statsTypeToValue) throws AnalysisException { - synchronized (this) { - TableStats tableStats = getNotNullTableStats(tableId); - tableStats.updateTableStats(statsTypeToValue); - } - } - - public void updatePartitionStats(long tableId, String partitionName, Map statsTypeToValue) - throws AnalysisException { - synchronized (this) { - TableStats tableStats = getNotNullTableStats(tableId); - tableStats.updatePartitionStats(partitionName, statsTypeToValue); - } - } - - public void updateColumnStats(long tableId, String columnName, Type columnType, - Map statsTypeToValue) throws AnalysisException { - synchronized (this) { - TableStats tableStats = getNotNullTableStats(tableId); - tableStats.updateColumnStats(columnName, columnType, statsTypeToValue); - } - } - - public void updateColumnStats(long tableId, String partitionName, String columnName, Type columnType, - Map statsTypeToValue) throws AnalysisException { - synchronized (this) { - PartitionStats partitionStats = getNotNullPartitionStats(tableId, partitionName); - partitionStats.updateColumnStats(columnName, columnType, statsTypeToValue); - } - } - - public void dropTableStats(long tableId) { - dropPartitionStats(tableId, null); - } - - public void dropPartitionStats(long tableId, String partitionName) { - synchronized (this) { - if (idToTableStats.containsKey(tableId)) { - if (Strings.isNullOrEmpty(partitionName)) { - idToTableStats.remove(tableId); - LOG.info("Deleted table(id={}) statistics.", tableId); - } else { - TableStats tableStats = idToTableStats.get(tableId); - tableStats.getNameToPartitionStats().remove(partitionName); - LOG.info("Deleted statistics for partition {} of table(id={}).", - partitionName, tableId); - } - } - } - } - - // TODO: mock statistics need to be removed in the future - public void mockTableStatsWithRowCount(long tableId, double rowCount) { - TableStats tableStats = idToTableStats.get(tableId); - if (tableStats == null) { - tableStats = new TableStats(rowCount, 1); - idToTableStats.put(tableId, tableStats); - } - } - - /** - * if the table stats is not exist, create a new one. - * - * @param tableId table id - * @return @TableStats - */ - private TableStats getNotNullTableStats(long tableId) { - TableStats tableStats = idToTableStats.get(tableId); - if (tableStats == null) { - tableStats = new TableStats(); - idToTableStats.put(tableId, tableStats); - } - return tableStats; - } - - /** - * if the partition stats is not exist, create a new one. - * - * @param tableId table id - * @param partitionName partition name - * @return @TableStats - */ - private PartitionStats getNotNullPartitionStats(long tableId, String partitionName) { - TableStats tableStats = getNotNullTableStats(tableId); - Map nameToPartitionStats = tableStats.getNameToPartitionStats(); - PartitionStats partitionStats = nameToPartitionStats.get(partitionName); - if (partitionStats == null) { - partitionStats = new PartitionStats(); - nameToPartitionStats.put(partitionName, partitionStats); - } - return partitionStats; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsDesc.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsDesc.java deleted file mode 100644 index a327fed395..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsDesc.java +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import java.util.List; - -public class StatisticsDesc { - private StatsCategory statsCategory; - - private StatsGranularity statsGranularity; - - private List statsTypes; - - public StatisticsDesc(StatsCategory statsCategory, - StatsGranularity statsGranularity, - List statsTypes) { - this.statsCategory = statsCategory; - this.statsGranularity = statsGranularity; - this.statsTypes = statsTypes; - } - - public StatsCategory getStatsCategory() { - return statsCategory; - } - - public void setStatsCategory(StatsCategory statsCategory) { - this.statsCategory = statsCategory; - } - - public StatsGranularity getStatsGranularity() { - return statsGranularity; - } - - public void setStatsGranularity(StatsGranularity statsGranularity) { - this.statsGranularity = statsGranularity; - } - - public List getStatsTypes() { - return statsTypes; - } - - public void setStatsTypes(List statsTypes) { - this.statsTypes = statsTypes; - } -} - diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJob.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJob.java deleted file mode 100644 index 62cc5638ce..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJob.java +++ /dev/null @@ -1,342 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.Table; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.DdlException; -import org.apache.doris.common.util.TimeUtils; - -import com.google.common.base.Strings; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import org.apache.commons.lang3.StringUtils; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.text.SimpleDateFormat; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.locks.ReentrantReadWriteLock; -import javax.annotation.Nullable; - -/*** - * Used to store statistics job info, - * including job status, progress, etc. - */ -public class StatisticsJob { - private static final Logger LOG = LogManager.getLogger(StatisticsJob.class); - - public enum JobState { - PENDING, - SCHEDULING, - RUNNING, - FINISHED, - FAILED, - CANCELLED - } - - protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true); - - private final long id = Env.getCurrentEnv().getNextId(); - - /** - * to be collected database stats. - */ - private final long dbId; - - /** - * to be collected table stats. - */ - private final Set tblIds; - - /** - * to be collected partition stats. - */ - private final Map> tableIdToPartitionName; - - /** - * to be collected column stats. - */ - private final Map> tableIdToColumnName; - - private final Map properties; - - /** - * to be executed tasks. - */ - private final List tasks = Lists.newArrayList(); - - private JobState jobState = JobState.PENDING; - private final List errorMsgs = Lists.newArrayList(); - - private final long createTime = System.currentTimeMillis(); - private long startTime = -1L; - private long finishTime = -1L; - private int progress = 0; - - public StatisticsJob(Long dbId, - Set tblIds, - Map> tblIdToPartitionName, - Map> tableIdToColumnName, - Map properties) { - this.dbId = dbId; - this.tblIds = tblIds; - this.tableIdToPartitionName = tblIdToPartitionName; - this.tableIdToColumnName = tableIdToColumnName; - this.properties = properties == null ? Maps.newHashMap() : properties; - } - - public void readLock() { - lock.readLock().lock(); - } - - public void readUnlock() { - lock.readLock().unlock(); - } - - private void writeLock() { - lock.writeLock().lock(); - } - - private void writeUnlock() { - lock.writeLock().unlock(); - } - - public long getId() { - return id; - } - - public long getDbId() { - return dbId; - } - - public Set getTblIds() { - return tblIds; - } - - public Map> getTableIdToPartitionName() { - return tableIdToPartitionName; - } - - public Map> getTableIdToColumnName() { - return tableIdToColumnName; - } - - public Map getProperties() { - return properties; - } - - public List getTasks() { - return tasks; - } - - public List getErrorMsgs() { - return errorMsgs; - } - - public JobState getJobState() { - return jobState; - } - - public long getCreateTime() { - return createTime; - } - - public long getStartTime() { - return startTime; - } - - public long getFinishTime() { - return finishTime; - } - - public int getProgress() { - return progress; - } - - public void updateJobState(JobState newState) throws DdlException { - LOG.info("To change statistics job(id={}) state from {} to {}", id, jobState, newState); - writeLock(); - JobState fromState = jobState; - try { - unprotectedUpdateJobState(newState); - } catch (DdlException e) { - LOG.warn(e.getMessage(), e); - throw e; - } finally { - writeUnlock(); - } - LOG.info("Statistics job(id={}) state changed from {} to {}", id, fromState, jobState); - } - - private void unprotectedUpdateJobState(JobState newState) throws DdlException { - // PENDING -> PENDING/SCHEDULING/FAILED/CANCELLED - if (jobState == JobState.PENDING) { - switch (newState) { - case PENDING: - case SCHEDULING: - break; - case FAILED: - case CANCELLED: - finishTime = System.currentTimeMillis(); - break; - default: - throw new DdlException("Invalid job state transition from " + jobState + " to " + newState); - } - } else if (jobState == JobState.SCHEDULING) { // SCHEDULING -> RUNNING/FAILED/CANCELLED - switch (newState) { - case RUNNING: - startTime = System.currentTimeMillis(); - break; - case FAILED: - case CANCELLED: - finishTime = System.currentTimeMillis(); - break; - default: - throw new DdlException("Invalid job state transition from " + jobState + " to " + newState); - } - } else if (jobState == JobState.RUNNING) { // RUNNING -> FINISHED/FAILED/CANCELLED - switch (newState) { - case FINISHED: - case FAILED: - case CANCELLED: - // set finish time - finishTime = System.currentTimeMillis(); - break; - default: - throw new DdlException("Invalid job state transition from " + jobState + " to " + newState); - } - } else { - // TODO - throw new DdlException("Invalid job state transition from " + jobState + " to " + newState); - } - jobState = newState; - } - - public void updateJobInfoByTaskId(Long taskId, String errorMsg) throws DdlException { - writeLock(); - try { - for (StatisticsTask task : tasks) { - if (taskId == task.getId()) { - if (Strings.isNullOrEmpty(errorMsg)) { - progress += 1; - if (progress == tasks.size()) { - unprotectedUpdateJobState(StatisticsJob.JobState.FINISHED); - } - task.updateTaskState(StatisticsTask.TaskState.FINISHED); - } else { - errorMsgs.add(errorMsg); - task.updateTaskState(StatisticsTask.TaskState.FAILED); - unprotectedUpdateJobState(StatisticsJob.JobState.FAILED); - } - return; - } - } - } finally { - writeUnlock(); - } - } - - public List getShowInfo(@Nullable Long tableId) throws AnalysisException { - List result = Lists.newArrayList(); - - result.add(Long.toString(id)); - - SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); - result.add(TimeUtils.longToTimeString(createTime, dateFormat)); - result.add(startTime != -1L ? TimeUtils.longToTimeString(startTime, dateFormat) : "N/A"); - result.add(finishTime != -1L ? TimeUtils.longToTimeString(finishTime, dateFormat) : "N/A"); - - StringBuilder sb = new StringBuilder(); - for (String errorMsg : errorMsgs) { - sb.append(errorMsg).append("\n"); - } - result.add(sb.toString()); - - int totalTaskNum = 0; - int finishedTaskNum = 0; - Map> tblIdToCols = Maps.newHashMap(); - - for (StatisticsTask task : tasks) { - List statsDescs = task.getStatsDescs(); - - if (!statsDescs.isEmpty()) { - // The same task has the same stats properties - StatsCategory statsCategory = statsDescs.get(0).getStatsCategory(); - long tblId = statsCategory.getTableId(); - - if (tableId == null || tableId == tblId) { - totalTaskNum++; - if (task.getTaskState() == StatisticsTask.TaskState.FINISHED) { - finishedTaskNum++; - } - - String col = statsCategory.getColumnName(); - if (Strings.isNullOrEmpty(col)) { - continue; - } - tblIdToCols.computeIfAbsent(tblId, - (key) -> Sets.newHashSet()).add(col); - } - } - } - - List scope = Lists.newArrayList(); - Database db = Env.getCurrentEnv().getInternalCatalog().getDbOrAnalysisException(dbId); - for (Long tblId : tblIds) { - try { - Table table = db.getTableOrAnalysisException(tblId); - List baseSchema = table.getBaseSchema(); - Set cols = tblIdToCols.get(tblId); - if (cols != null) { - if (baseSchema.size() == cols.size()) { - scope.add(table.getName() + "(*)"); - } else { - scope.add(table.getName() + "(" + StringUtils.join(cols.toArray(), ", ") + ")"); - } - } - } catch (AnalysisException e) { - // catch this exception when table is dropped - LOG.info("get table failed, tableId: " + tblId, e); - } - } - - // exclude invalid info - if (scope.isEmpty()) { - return Collections.emptyList(); - } - - result.add(StringUtils.join(scope.toArray(), ",")); - result.add(finishedTaskNum + "/" + totalTaskNum); - - if (totalTaskNum > 0 && totalTaskNum == finishedTaskNum) { - result.add("FINISHED"); - } else { - result.add(jobState.toString()); - } - - return result; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java deleted file mode 100644 index dbdc202f0e..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java +++ /dev/null @@ -1,192 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.analysis.ShowAnalyzeStmt; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.Table; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.Config; -import org.apache.doris.common.ErrorCode; -import org.apache.doris.common.ErrorReport; -import org.apache.doris.common.util.ListComparator; -import org.apache.doris.common.util.OrderByPair; - -import com.google.common.base.Strings; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -/** - * For unified management of statistics job, - * including job addition, cancellation, scheduling, etc. - */ -public class StatisticsJobManager { - private static final Logger LOG = LogManager.getLogger(StatisticsJobManager.class); - - /** - * save statistics job status information - */ - private final Map idToStatisticsJob = Maps.newConcurrentMap(); - - public Map getIdToStatisticsJob() { - return idToStatisticsJob; - } - - /** - * The statistical job has the following restrict: - * - Rule1: The same table cannot have two unfinished statistics jobs - * - Rule2: The unfinished statistics job could not more than Config.max_statistics_job_num - * - Rule3: The job for external table is not supported - */ - private void checkRestrict(long dbId, Set tableIds) throws AnalysisException { - Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbId); - db.readLock(); - try { - // check table type - for (Long tableId : tableIds) { - Table table = db.getTableOrAnalysisException(tableId); - if (table.getType() != Table.TableType.OLAP) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_NOT_OLAP_TABLE, db.getFullName(), - table.getName(), "ANALYZE"); - } - } - } finally { - db.readUnlock(); - } - - int unfinishedJobs = 0; - - // check table unfinished job - for (StatisticsJob statisticsJob : idToStatisticsJob.values()) { - StatisticsJob.JobState jobState = statisticsJob.getJobState(); - Set tblIds = statisticsJob.getTblIds(); - if (jobState == StatisticsJob.JobState.PENDING - || jobState == StatisticsJob.JobState.SCHEDULING - || jobState == StatisticsJob.JobState.RUNNING) { - for (Long tableId : tableIds) { - if (tblIds.contains(tableId)) { - throw new AnalysisException("The table(id=" + tableId + ") have unfinished statistics jobs"); - } - } - unfinishedJobs++; - } - } - - // check the number of unfinished tasks - if (unfinishedJobs > Config.cbo_max_statistics_job_num) { - throw new AnalysisException("The unfinished statistics job could not more than cbo_max_statistics_job_num: " - + Config.cbo_max_statistics_job_num); - } - } - - public List> getAnalyzeJobInfos(ShowAnalyzeStmt showStmt) throws AnalysisException { - List> results = Lists.newArrayList(); - - String stateValue = showStmt.getStateValue(); - StatisticsJob.JobState jobState = null; - if (!Strings.isNullOrEmpty(stateValue)) { - jobState = StatisticsJob.JobState.valueOf(stateValue); - } - - // step 1: get job infos - List jobIds = showStmt.getJobIds(); - if (jobIds != null && !jobIds.isEmpty()) { - for (Long jobId : jobIds) { - StatisticsJob statisticsJob = idToStatisticsJob.get(jobId); - if (statisticsJob == null) { - throw new AnalysisException("No such job id: " + jobId); - } - if (jobState == null || jobState == statisticsJob.getJobState()) { - List showInfo = statisticsJob.getShowInfo(null); - if (showInfo == null || showInfo.isEmpty()) { - continue; - } - results.add(showInfo); - } - } - } else { - long dbId = showStmt.getDbId(); - Set tblIds = showStmt.getTblIds(); - for (StatisticsJob statisticsJob : idToStatisticsJob.values()) { - long jobDbId = statisticsJob.getDbId(); - if (jobDbId == dbId) { - // check the state - if (jobState == null || jobState == statisticsJob.getJobState()) { - Set jobTblIds = statisticsJob.getTblIds(); - // get the intersection of two sets - Set set = Sets.newHashSet(); - set.addAll(jobTblIds); - set.retainAll(tblIds); - for (long tblId : set) { - List showInfo = statisticsJob.getShowInfo(tblId); - if (showInfo == null || showInfo.isEmpty()) { - continue; - } - results.add(showInfo); - } - } - } - } - } - - // step2: order the result - ListComparator> comparator; - List orderByPairs = showStmt.getOrderByPairs(); - if (orderByPairs == null) { - // sort by id asc - comparator = new ListComparator<>(0); - } else { - OrderByPair[] orderByPairArr = new OrderByPair[orderByPairs.size()]; - comparator = new ListComparator<>(orderByPairs.toArray(orderByPairArr)); - } - results.sort(comparator); - - // step3: filter by limit - long limit = showStmt.getLimit(); - long offset = showStmt.getOffset() == -1L ? 0 : showStmt.getOffset(); - if (offset >= results.size()) { - results = Collections.emptyList(); - } else if (limit != -1L) { - if ((limit + offset) >= results.size()) { - results = results.subList((int) offset, results.size()); - } else { - results = results.subList((int) offset, (int) (limit + offset)); - } - } - - // step4: convert to result and return it - List> rows = Lists.newArrayList(); - for (List result : results) { - List row = result.stream().map(Object::toString) - .collect(Collectors.toList()); - rows.add(row); - } - - return rows; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java deleted file mode 100644 index 4e492d6e30..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java +++ /dev/null @@ -1,530 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.KeysType; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.Partition; -import org.apache.doris.catalog.Table; -import org.apache.doris.catalog.Tablet; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.Config; -import org.apache.doris.common.DdlException; -import org.apache.doris.common.util.MasterDaemon; -import org.apache.doris.statistics.StatsCategory.Category; -import org.apache.doris.statistics.StatsGranularity.Granularity; - -import com.google.common.collect.Lists; -import com.google.common.collect.Queues; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Queue; -import java.util.Set; - -/** - * Schedule statistics job. - * 1. divide job to multi task - * 2. submit all task to StatisticsTaskScheduler - * Switch job state from pending to scheduling. - */ -public class StatisticsJobScheduler extends MasterDaemon { - private static final Logger LOG = LogManager.getLogger(StatisticsJobScheduler.class); - - /** - * If the table row-count is greater than the maximum number of Be scans for a single BE, - * we'll divide subtasks by partition. relevant values(3700000000L&600000000L) are derived from test. - * COUNT_MAX_SCAN_PER_TASK is for count(expr), NDV_MAX_SCAN_PER_TASK is for min(c1)/max(c1)/ndv(c1). - */ - private static final long COUNT_MAX_SCAN_PER_TASK = 3700000000L; - private static final long NDV_MAX_SCAN_PER_TASK = 600000000L; - - /** - * if the table row count is greater than the value, use sampleSqlTask instead of SqlTask. - */ - private static final int MIN_SAMPLE_ROWS = 200000; - - /** - * Different statistics need to be collected for the jobs submitted by users. - * if all statistics be collected at the same time, the cluster may be overburdened - * and normal query services may be affected. Therefore, we put the jobs into the queue - * and schedule them one by one, and finally divide each job to several subtasks and execute them. - */ - public final Queue pendingJobQueue - = Queues.newLinkedBlockingQueue(Config.cbo_max_statistics_job_num); - - public StatisticsJobScheduler() { - super("Statistics job scheduler", - Config.statistic_job_scheduler_execution_interval_ms); - } - - @Override - protected void runAfterCatalogReady() { - StatisticsJob pendingJob = pendingJobQueue.peek(); - if (pendingJob != null) { - try { - if (pendingJob.getTasks().size() == 0) { - divide(pendingJob); - } - List tasks = pendingJob.getTasks(); - Env.getCurrentEnv().getStatisticsTaskScheduler().addTasks(tasks); - pendingJob.updateJobState(StatisticsJob.JobState.SCHEDULING); - pendingJobQueue.remove(); - } catch (IllegalStateException e) { - // throw IllegalStateException if the queue is full, re-add the tasks next time - LOG.info("The statistics task queue is full, schedule the job(id={}) later", pendingJob.getId()); - } catch (DdlException e) { - pendingJobQueue.remove(); - try { - // TODO change to without exception - pendingJob.updateJobState(StatisticsJob.JobState.FAILED); - } catch (DdlException ddlException) { - LOG.fatal(ddlException.getMessage(), e); - } - LOG.info("Failed to schedule the statistical job(id={})", pendingJob.getId(), e); - } - } - } - - public void addPendingJob(StatisticsJob statisticsJob) throws IllegalStateException { - pendingJobQueue.add(statisticsJob); - } - - /** - * Statistics tasks are of the following types: - * table: - * - row_count: table row count are critical in estimating cardinality and memory usage of scan nodes. - * - data_size: table size, not applicable to CBO, mainly used to monitor and manage table size. - * column: - * - num_distinct_value: used to determine the selectivity of an equivalent expression. - * - min: The minimum value. - * - max: The maximum value. - * - num_nulls: number of nulls. - * - avg_col_len: the average length of a column, in bytes, is used for memory and network IO evaluation. - * - max_col_len: the Max length of the column, in bytes, is used for memory and network IO evaluation. - *

- * Divide: - * - min, max, ndv: These three full indicators are collected by a sub-task. - * - max_col_lens, avg_col_lens: Two sampling indicators were collected by a sub-task. - *

- * If the table row-count is greater than the maximum number of Be scans for a single BE, - * we'll divide subtasks by partition. relevant values(3700000000L&600000000L) are derived from test. - *

- * Eventually, we will get several subtasks of the following types: - * - * @throws DdlException DdlException - * @see MetaStatisticsTask - * @see SampleSQLStatisticsTask - * @see SQLStatisticsTask - */ - private void divide(StatisticsJob job) throws DdlException { - Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(job.getDbId()); - Set tblIds = job.getTblIds(); - - for (Long tblId : tblIds) { - Optional optionalTbl = db.getTable(tblId); - if (optionalTbl.isPresent()) { - Table table = optionalTbl.get(); - if (!table.isPartitioned()) { - getStatsTaskByTable(job, tblId); - } else { - getStatsTaskByPartition(job, tblId); - } - } else { - LOG.warn("Table(id={}) not found in the database {}", tblId, db.getFullName()); - } - } - } - - /** - * For non-partitioned table, dividing the job into several subtasks. - * - * @param job statistics job - * @param tableId table id - * @throws DdlException exception - */ - private void getStatsTaskByTable(StatisticsJob job, long tableId) throws DdlException { - Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(job.getDbId()); - OlapTable table = (OlapTable) db.getTableOrDdlException(tableId); - - if (table.getDataSize() == 0) { - LOG.info("Do not collect statistics for empty table {}", table.getName()); - return; - } - - Map> tblIdToColName = job.getTableIdToColumnName(); - List colNames = tblIdToColName.get(tableId); - - List backendIds = Env.getCurrentSystemInfo().getBackendIds(true); - - // step1: collect statistics by metadata - List descs = Lists.newArrayList(); - - // table data size - StatsCategory dsCategory = getTableStatsCategory(job.getDbId(), tableId); - StatsGranularity dsGranularity = getTableGranularity(tableId); - StatisticsDesc dsStatsDesc = new StatisticsDesc(dsCategory, - dsGranularity, Collections.singletonList(StatsType.DATA_SIZE)); - descs.add(dsStatsDesc); - - // table row count - if (table.getKeysType() == KeysType.DUP_KEYS) { - StatsCategory rcCategory = getTableStatsCategory(job.getDbId(), tableId); - StatsGranularity rcGranularity = getTableGranularity(tableId); - StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory, - rcGranularity, Collections.singletonList(StatsType.ROW_COUNT)); - descs.add(rcStatsDesc); - } - - // variable-length columns - List strColNames = Lists.newArrayList(); - - // column max size and avg size - for (String colName : colNames) { - Column column = table.getColumn(colName); - if (column == null) { - LOG.info("Column {} not found in table {}", colName, table.getName()); - continue; - } - Type colType = column.getType(); - if (colType.isStringType()) { - strColNames.add(colName); - continue; - } - StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName); - StatsGranularity colGranularity = getTableGranularity(tableId); - StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory, - colGranularity, Arrays.asList(StatsType.MAX_SIZE, StatsType.AVG_SIZE)); - descs.add(colStatsDesc); - } - - // all meta statistics are collected in one task - MetaStatisticsTask metaStatsTask = new MetaStatisticsTask(job.getId(), descs); - job.getTasks().add(metaStatsTask); - - long rowCount = table.getRowCount(); - - // step2: collect statistics by sql - // table row count (table model is AGGREGATE or UNIQUE) - if (table.getKeysType() != KeysType.DUP_KEYS) { - if (rowCount < backendIds.size() * COUNT_MAX_SCAN_PER_TASK) { - StatsCategory rcCategory = getTableStatsCategory(job.getDbId(), tableId); - StatsGranularity rcGranularity = getTableGranularity(tableId); - StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory, - rcGranularity, Collections.singletonList(StatsType.ROW_COUNT)); - SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(), - Collections.singletonList(rcStatsDesc)); - job.getTasks().add(sqlTask); - } else { - // divide subtasks by tablet - Collection partitions = table.getPartitions(); - for (Partition partition : partitions) { - Collection tablets = partition.getBaseIndex().getTablets(); - tablets.forEach(tablet -> { - StatsCategory rcCategory = getTableStatsCategory(job.getDbId(), tableId); - StatsGranularity rcGranularity = getTabletGranularity(tablet.getId()); - StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory, - rcGranularity, Collections.singletonList(StatsType.ROW_COUNT)); - SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(), - Collections.singletonList(rcStatsDesc)); - job.getTasks().add(sqlTask); - }); - } - } - } - - // column max size, avg size - for (String colName : strColNames) { - StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName); - StatsGranularity colGranularity = getTableGranularity(tableId); - getColumnSizeSqlTask(job, rowCount, colCategory, colGranularity); - } - - // column num nulls - for (String colName : colNames) { - StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName); - StatsGranularity colGranularity = getTableGranularity(tableId); - StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory, - colGranularity, Collections.singletonList(StatsType.NUM_NULLS)); - SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(), - Collections.singletonList(colStatsDesc)); - job.getTasks().add(sqlTask); - } - - // column max value, min value and ndv - for (String colName : colNames) { - if (rowCount < backendIds.size() * NDV_MAX_SCAN_PER_TASK) { - StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName); - StatsGranularity colGranularity = getTableGranularity(tableId); - StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory, - colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV)); - SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(), - Collections.singletonList(colStatsDesc)); - job.getTasks().add(sqlTask); - } else { - // for non-partitioned table system automatically - // generates a partition with the same name as the table name - Collection partitions = table.getPartitions(); - for (Partition partition : partitions) { - List tablets = partition.getBaseIndex().getTablets(); - tablets.forEach(tablet -> { - StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName); - StatsGranularity colGranularity = getTabletGranularity(tablet.getId()); - StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory, - colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV)); - SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(), - Collections.singletonList(colStatsDesc)); - job.getTasks().add(sqlTask); - }); - } - } - } - } - - /** - * If table is partitioned, dividing the job into several subtasks by partition. - * - * @param job statistics job - * @param tableId table id - * @throws DdlException exception - */ - private void getStatsTaskByPartition(StatisticsJob job, long tableId) throws DdlException { - Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(job.getDbId()); - OlapTable table = (OlapTable) db.getTableOrDdlException(tableId); - - Map> tblIdToColName = job.getTableIdToColumnName(); - List colNames = tblIdToColName.get(tableId); - - Map> tblIdToPartitionName = job.getTableIdToPartitionName(); - List partitionNames = tblIdToPartitionName.get(tableId); - - List backendIds = Env.getCurrentSystemInfo().getBackendIds(true); - - for (String partitionName : partitionNames) { - Partition partition = table.getPartition(partitionName); - if (partition == null) { - LOG.info("Partition {} not found in the table {}", partitionName, table.getName()); - continue; - } - if (partition.getDataSize() == 0) { - LOG.info("Do not collect statistics for empty partition {} in the table {}", - partitionName, table.getName()); - continue; - } - - long partitionId = partition.getId(); - long rowCount = partition.getBaseIndex().getRowCount(); - - // step1: collect statistics by metadata - List descs = Lists.newArrayList(); - - // partition data size - StatsCategory dsCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName); - StatsGranularity dsGranularity = getPartitionGranularity(partitionId); - StatisticsDesc dsStatsDesc = new StatisticsDesc(dsCategory, - dsGranularity, Collections.singletonList(StatsType.DATA_SIZE)); - descs.add(dsStatsDesc); - - // partition row count - if (table.getKeysType() == KeysType.DUP_KEYS) { - StatsCategory rcCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName); - StatsGranularity rcGranularity = getPartitionGranularity(partitionId); - StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory, - rcGranularity, Collections.singletonList(StatsType.ROW_COUNT)); - descs.add(rcStatsDesc); - } - - // variable-length columns - List strColNames = Lists.newArrayList(); - - // column max size and avg size - for (String colName : colNames) { - Column column = table.getColumn(colName); - if (column == null) { - LOG.info("Column {} not found in the table {}", colName, table.getName()); - continue; - } - Type colType = column.getType(); - if (colType.isStringType()) { - strColNames.add(colName); - continue; - } - StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName); - StatsGranularity colGranularity = getPartitionGranularity(partitionId); - StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory, - colGranularity, Arrays.asList(StatsType.MAX_SIZE, StatsType.AVG_SIZE)); - descs.add(colStatsDesc); - } - - // all meta statistics are collected in one task - MetaStatisticsTask metaStatsTask = new MetaStatisticsTask(job.getId(), descs); - job.getTasks().add(metaStatsTask); - - // step2: collect statistics by sql - // partition row count (table model is AGGREGATE or UNIQUE) - if (table.getKeysType() != KeysType.DUP_KEYS) { - if (rowCount < backendIds.size() * COUNT_MAX_SCAN_PER_TASK) { - StatsCategory rcCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName); - StatsGranularity rcGranularity = getPartitionGranularity(partitionId); - StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory, - rcGranularity, Collections.singletonList(StatsType.ROW_COUNT)); - SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(), - Collections.singletonList(rcStatsDesc)); - job.getTasks().add(sqlTask); - } else { - // divide subtasks by tablet - List tablets = partition.getBaseIndex().getTablets(); - tablets.forEach(tablet -> { - StatsCategory rcCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName); - StatsGranularity rcGranularity = getTabletGranularity(tablet.getId()); - StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory, - rcGranularity, Collections.singletonList(StatsType.ROW_COUNT)); - SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(), - Collections.singletonList(rcStatsDesc)); - job.getTasks().add(sqlTask); - }); - } - } - - // column max size, avg size - for (String colName : strColNames) { - StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName); - StatsGranularity colGranularity = getPartitionGranularity(partitionId); - getColumnSizeSqlTask(job, rowCount, colCategory, colGranularity); - } - - // column null nums - for (String colName : colNames) { - StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName); - StatsGranularity colGranularity = getPartitionGranularity(partitionId); - StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory, - colGranularity, Collections.singletonList(StatsType.NUM_NULLS)); - SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(), - Collections.singletonList(colStatsDesc)); - job.getTasks().add(sqlTask); - } - - // column max value, min value and ndv - for (String colName : colNames) { - if (rowCount < backendIds.size() * NDV_MAX_SCAN_PER_TASK) { - StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName); - StatsGranularity colGranularity = getPartitionGranularity(partitionId); - StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory, - colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV)); - SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(), - Collections.singletonList(colStatsDesc)); - job.getTasks().add(sqlTask); - } else { - // divide subtasks by tablet - List tablets = partition.getBaseIndex().getTablets(); - tablets.forEach(tablet -> { - StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), - tableId, partitionName, colName); - StatsGranularity colGranularity = getTabletGranularity(tablet.getId()); - StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory, - colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV)); - SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(), - Collections.singletonList(colStatsDesc)); - job.getTasks().add(sqlTask); - }); - } - } - } - } - - private void getColumnSizeSqlTask(StatisticsJob job, long rowCount, - StatsCategory colCategory, StatsGranularity colGranularity) { - StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory, - colGranularity, Arrays.asList(StatsType.MAX_SIZE, StatsType.AVG_SIZE)); - SQLStatisticsTask sqlTask; - if (rowCount < MIN_SAMPLE_ROWS) { - sqlTask = new SQLStatisticsTask(job.getId(), Collections.singletonList(colStatsDesc)); - } else { - sqlTask = new SampleSQLStatisticsTask(job.getId(), Collections.singletonList(colStatsDesc)); - } - job.getTasks().add(sqlTask); - } - - private StatsCategory getTableStatsCategory(long dbId, long tableId) { - StatsCategory category = new StatsCategory(); - category.setCategory(StatsCategory.Category.TABLE); - category.setDbId(dbId); - category.setTableId(tableId); - return category; - } - - private StatsCategory getPartitionStatsCategory(long dbId, long tableId, String partitionName) { - StatsCategory category = new StatsCategory(); - category.setCategory(Category.PARTITION); - category.setDbId(dbId); - category.setTableId(tableId); - category.setPartitionName(partitionName); - return category; - } - - private StatsCategory getColumnStatsCategory(long dbId, long tableId, String columnName) { - StatsCategory category = new StatsCategory(); - category.setDbId(dbId); - category.setTableId(tableId); - category.setColumnName(columnName); - category.setCategory(Category.COLUMN); - category.setColumnName(columnName); - return category; - } - - private StatsCategory getColumnStatsCategory(long dbId, long tableId, String partitionName, String columnName) { - StatsCategory category = new StatsCategory(); - category.setDbId(dbId); - category.setTableId(tableId); - category.setPartitionName(partitionName); - category.setColumnName(columnName); - category.setCategory(Category.COLUMN); - category.setColumnName(columnName); - return category; - } - - private StatsGranularity getTableGranularity(long tableId) { - StatsGranularity granularity = new StatsGranularity(); - granularity.setTableId(tableId); - granularity.setGranularity(Granularity.TABLE); - return granularity; - } - - private StatsGranularity getPartitionGranularity(long partitionId) { - StatsGranularity granularity = new StatsGranularity(); - granularity.setPartitionId(partitionId); - granularity.setGranularity(Granularity.PARTITION); - return granularity; - } - - private StatsGranularity getTabletGranularity(long tabletId) { - StatsGranularity granularity = new StatsGranularity(); - granularity.setTabletId(tabletId); - granularity.setGranularity(Granularity.TABLET); - return granularity; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java deleted file mode 100644 index 2c9a856a47..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java +++ /dev/null @@ -1,508 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.analysis.AlterColumnStatsStmt; -import org.apache.doris.analysis.AlterTableStatsStmt; -import org.apache.doris.analysis.DropTableStatsStmt; -import org.apache.doris.analysis.ShowTableStatsStmt; -import org.apache.doris.analysis.TableName; -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.PartitionType; -import org.apache.doris.catalog.Table; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.ErrorCode; -import org.apache.doris.common.ErrorReport; -import org.apache.doris.mysql.privilege.PrivPredicate; -import org.apache.doris.qe.ConnectContext; -import org.apache.doris.statistics.StatisticsTaskResult.TaskResult; -import org.apache.doris.statistics.StatsGranularity.Granularity; - -import com.google.common.base.Strings; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import org.apache.commons.lang3.math.NumberUtils; - -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Set; - -public class StatisticsManager { - - private final Statistics statistics; - - public StatisticsManager() { - statistics = new Statistics(); - } - - public Statistics getStatistics() { - return statistics; - } - - /** - * Support for deleting table or partition statistics. - * - * @param stmt get table name and partition name from it. - */ - public void dropStats(DropTableStatsStmt stmt) { - Map> tblIdToPartition = stmt.getTblIdToPartition(); - - if (tblIdToPartition != null && !tblIdToPartition.isEmpty()) { - tblIdToPartition.forEach((tableId, partitions) -> { - if (partitions == null || partitions.isEmpty()) { - statistics.dropTableStats(tableId); - } else { - for (String partition : partitions) { - statistics.dropPartitionStats(tableId, partition); - } - } - }); - } - } - - /** - * Alter table or partition stats. if partition name is not null, update partition stats. - * - * @param stmt alter table stats stmt - * @throws AnalysisException if table or partition not exist - */ - public void alterTableStatistics(AlterTableStatsStmt stmt) throws AnalysisException { - Table table = validateTableName(stmt.getTableName()); - List partitionNames = stmt.getPartitionNames(); - Map statsTypeToValue = stmt.getStatsTypeToValue(); - - if (partitionNames.isEmpty()) { - statistics.updateTableStats(table.getId(), statsTypeToValue); - return; - } - - for (String partitionName : partitionNames) { - partitionName = validatePartitionName(table, partitionName); - statistics.updatePartitionStats(table.getId(), partitionName, statsTypeToValue); - } - } - - /** - * Alter column stats. if partition name is not null, update column of partition stats. - * - * @param stmt alter column stats stmt - * @throws AnalysisException if table, column or partition not exist - */ - public void alterColumnStatistics(AlterColumnStatsStmt stmt) throws AnalysisException { - Table table = validateTableName(stmt.getTableName()); - String colName = stmt.getColumnName(); - List partitionNames = stmt.getPartitionNames(); - Map statsTypeToValue = stmt.getStatsTypeToValue(); - - if ((partitionNames.isEmpty()) && table instanceof OlapTable - && !((OlapTable) table).getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) { - throw new AnalysisException("Partitioned table must specify partition name."); - } - - if (partitionNames.isEmpty()) { - Column column = validateColumn(table, colName); - Type colType = column.getType(); - statistics.updateColumnStats(table.getId(), colName, colType, statsTypeToValue); - return; - } - - for (String partitionName : partitionNames) { - validatePartitionName(table, partitionName); - Column column = validateColumn(table, colName); - Type colType = column.getType(); - statistics.updateColumnStats(table.getId(), partitionName, colName, colType, statsTypeToValue); - } - } - - /** - * Update statistics. there are three types of statistics: column, table and column. - * - * @param statsTaskResults statistics task results - * @throws AnalysisException if column, table or partition not exist - */ - public void updateStatistics(List statsTaskResults) throws AnalysisException { - // tablet granularity stats(row count, max value, min value, ndv) - Map>> tabletStats = Maps.newHashMap(); - - for (StatisticsTaskResult statsTaskResult : statsTaskResults) { - if (statsTaskResult != null) { - List taskResults = statsTaskResult.getTaskResults(); - - for (TaskResult result : taskResults) { - validateResult(result); - long tblId = result.getTableId(); - Map statsTypeToValue = result.getStatsTypeToValue(); - - if (result.getGranularity() == Granularity.TABLET) { - statsTypeToValue.forEach((statsType, value) -> { - if (tabletStats.containsKey(statsType)) { - Map> resultToValue = tabletStats.get(statsType); - List values = resultToValue.get(result); - values.add(value); - } else { - Map> resultToValue = Maps.newHashMap(); - List values = Lists.newArrayList(); - values.add(value); - resultToValue.put(result, values); - tabletStats.put(statsType, resultToValue); - } - }); - continue; - } - - switch (result.getCategory()) { - case TABLE: - statistics.updateTableStats(tblId, statsTypeToValue); - break; - case PARTITION: - String partitionName = result.getPartitionName(); - statistics.updatePartitionStats(tblId, partitionName, statsTypeToValue); - break; - case COLUMN: - updateColumnStats(result, statsTypeToValue); - break; - default: - throw new AnalysisException("Unknown stats category: " + result.getCategory()); - } - } - } - } - - // update tablet granularity stats - updateTabletStats(tabletStats); - } - - private void updateColumnStats(TaskResult result, Map statsTypeToValue) - throws AnalysisException { - long dbId = result.getDbId(); - long tblId = result.getTableId(); - String partitionName = result.getPartitionName(); - String colName = result.getColumnName(); - - Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbId); - OlapTable table = (OlapTable) db.getTableOrAnalysisException(tblId); - Column column = table.getColumn(colName); - Type colType = column.getType(); - - switch (result.getGranularity()) { - case TABLE: - statistics.updateColumnStats(tblId, colName, colType, statsTypeToValue); - break; - case PARTITION: - statistics.updateColumnStats(tblId, partitionName, colName, colType, statsTypeToValue); - break; - default: - // The tablet granularity is handle separately - throw new AnalysisException("Unknown granularity: " + result.getGranularity()); - } - } - - private void updateTabletStats(Map>> tabletStats) - throws AnalysisException { - for (Map.Entry>> statsEntry : tabletStats.entrySet()) { - StatsType statsType = statsEntry.getKey(); - Map> resultToValue = statsEntry.getValue(); - - for (Map.Entry> resultEntry : resultToValue.entrySet()) { - TaskResult result = resultEntry.getKey(); - List values = resultEntry.getValue(); - - switch (statsType) { - case ROW_COUNT: - updateTabletRowCount(result, values); - break; - case MAX_VALUE: - updateTabletMaxValue(result, values); - break; - case MIN_VALUE: - updateTabletMinValue(result, values); - break; - case NDV: - updateTabletNDV(result, values); - break; - default: - throw new AnalysisException("Unknown stats type: " + statsType); - } - } - } - } - - /** - * Get the statistics of a table. if specified partition name, get the statistics of the partition. - * - * @param stmt statement - * @return partition or table statistics - * @throws AnalysisException statistics not exist - */ - public List> showTableStatsList(ShowTableStatsStmt stmt) throws AnalysisException { - String dbName = stmt.getDbName(); - Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbName); - String tableName = stmt.getTableName(); - List> result = Lists.newArrayList(); - - if (tableName != null) { - Table table = db.getTableOrAnalysisException(tableName); - // check priv - if (!Env.getCurrentEnv().getAuth() - .checkTblPriv(ConnectContext.get(), dbName, tableName, PrivPredicate.SHOW)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "SHOW CREATE TABLE", - ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(), - dbName + ": " + tableName); - } - - List partitionNames = stmt.getPartitionNames(); - - if (partitionNames.isEmpty()) { - result.add(showTableStats(table)); - } else { - for (String partitionName : partitionNames) { - validatePartitionName(table, partitionName); - result.add(showTableStats(table, partitionName)); - } - } - } else { - for (Table table : db.getTables()) { - if (!Env.getCurrentEnv().getAuth() - .checkTblPriv(ConnectContext.get(), dbName, table.getName(), PrivPredicate.SHOW)) { - continue; - } - try { - result.add(showTableStats(table)); - } catch (AnalysisException e) { - // ignore no stats table - } - } - } - return result; - } - - private List showTableStats(Table table) throws AnalysisException { - TableStats tableStats = statistics.getTableStats(table.getId()); - if (tableStats == null) { - throw new AnalysisException("There is no statistics in this table:" + table.getName()); - } - List row = Lists.newArrayList(); - row.add(table.getName()); - row.addAll(tableStats.getShowInfo()); - return row; - } - - private List showTableStats(Table table, String partitionName) throws AnalysisException { - Map partitionStats = statistics.getPartitionStats(table.getId(), partitionName); - PartitionStats partitionStat = partitionStats.get(partitionName); - if (partitionStat == null) { - throw new AnalysisException("There is no statistics in this partition:" + partitionName); - } - List row = Lists.newArrayList(); - row.add(partitionName); - row.addAll(partitionStat.getShowInfo()); - return row; - } - - private List> showColumnStats(long tableId) throws AnalysisException { - List> result = Lists.newArrayList(); - Map columnStats = statistics.getColumnStats(tableId); - columnStats.forEach((key, stats) -> { - List row = Lists.newArrayList(); - row.add(key); - row.addAll(stats.getShowInfo()); - result.add(row); - }); - return result; - } - - private List> showColumnStats(long tableId, String partitionName) throws AnalysisException { - List> result = Lists.newArrayList(); - Map columnStats = statistics.getColumnStats(tableId, partitionName); - columnStats.forEach((key, stats) -> { - List row = Lists.newArrayList(); - row.add(key); - row.addAll(stats.getShowInfo()); - result.add(row); - }); - return result; - } - - private void updateTabletRowCount(TaskResult result, List values) throws AnalysisException { - long statsValue = values.stream().filter(NumberUtils::isCreatable) - .mapToLong(Long::parseLong).sum(); - - Map statsTypeToValue = Maps.newHashMap(); - statsTypeToValue.put(StatsType.ROW_COUNT, String.valueOf(statsValue)); - - if (result.getCategory() == StatsCategory.Category.TABLE) { - statistics.updateTableStats(result.getTableId(), statsTypeToValue); - } else if (result.getCategory() == StatsCategory.Category.PARTITION) { - statistics.updatePartitionStats(result.getTableId(), result.getPartitionName(), statsTypeToValue); - } - } - - private void updateTabletMaxValue(TaskResult result, List values) throws AnalysisException { - Column column = getNotNullColumn(result); - Type type = column.getType(); - String maxValue = getNumericMaxOrMinValue(values, type, true); - - Map statsTypeToValue = Maps.newHashMap(); - statsTypeToValue.put(StatsType.MAX_VALUE, maxValue); - - updateTabletGranularityStats(result, type, statsTypeToValue); - } - - private void updateTabletMinValue(TaskResult result, List values) throws AnalysisException { - Column column = getNotNullColumn(result); - Type type = column.getType(); - String minValue = getNumericMaxOrMinValue(values, type, false); - - Map statsTypeToValue = Maps.newHashMap(); - statsTypeToValue.put(StatsType.MIN_VALUE, minValue); - - updateTabletGranularityStats(result, type, statsTypeToValue); - } - - private void updateTabletNDV(TaskResult result, List values) throws AnalysisException { - double statsValue = values.stream().filter(NumberUtils::isCreatable) - .mapToLong(Long::parseLong).sum(); - - Map statsTypeToValue = Maps.newHashMap(); - statsTypeToValue.put(StatsType.NDV, String.valueOf(statsValue)); - - Column column = getNotNullColumn(result); - Type type = column.getType(); - updateTabletGranularityStats(result, type, statsTypeToValue); - } - - private void updateTabletGranularityStats(TaskResult result, Type columnType, - Map statsTypeToValue) throws AnalysisException { - if (result.getCategory() == StatsCategory.Category.TABLE) { - statistics.updateColumnStats(result.getTableId(), - result.getColumnName(), columnType, statsTypeToValue); - } else if (result.getCategory() == StatsCategory.Category.PARTITION) { - statistics.updateColumnStats(result.getTableId(), result.getPartitionName(), - result.getColumnName(), columnType, statsTypeToValue); - } - } - - private Table validateTableName(TableName dbTableName) throws AnalysisException { - String dbName = dbTableName.getDb(); - String tableName = dbTableName.getTbl(); - Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbName); - return db.getTableOrAnalysisException(tableName); - } - - /** - * Partition name is optional, if partition name is not null, it will be validated. - */ - private String validatePartitionName(Table table, String partitionName) throws AnalysisException { - if (!table.isPartitioned() && !Strings.isNullOrEmpty(partitionName)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_PARTITION_CLAUSE_ON_NONPARTITIONED, - partitionName, table.getName()); - } - - if (!Strings.isNullOrEmpty(partitionName) && table.getPartition(partitionName) == null) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_UNKNOWN_PARTITION, - partitionName, table.getName()); - } - - return partitionName; - } - - private Column validateColumn(Table table, String columnName) throws AnalysisException { - Column column = table.getColumn(columnName); - if (column == null) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, table.getName()); - } - return column; - } - - private void validateResult(TaskResult result) throws AnalysisException { - Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(result.getDbId()); - Table table = db.getTableOrAnalysisException(result.getTableId()); - - if (!Strings.isNullOrEmpty(result.getPartitionName())) { - validatePartitionName(table, result.getPartitionName()); - } - - if (!Strings.isNullOrEmpty(result.getColumnName())) { - validateColumn(table, result.getColumnName()); - } - - Map statsTypeToValue = result.getStatsTypeToValue(); - if (statsTypeToValue == null || statsTypeToValue.isEmpty()) { - throw new AnalysisException("StatsTypeToValue is empty."); - } - } - - private Column getNotNullColumn(TaskResult result) throws AnalysisException { - Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(result.getDbId()); - Table table = db.getTableOrAnalysisException(result.getTableId()); - Column column = table.getColumn(result.getColumnName()); - if (column == null) { - throw new AnalysisException("Column " + result.getColumnName() + " does not exist"); - } - return column; - } - - /** - * Get the max/min value of the column. - * - * @param values String List of values - * @param type column type - * @param maxOrMin true for max, false for min - * @return the max/min value of the column. - */ - private String getNumericMaxOrMinValue(List values, Type type, boolean maxOrMin) { - if (type.isFixedPointType()) { - long result = 0L; - for (String value : values) { - if (NumberUtils.isCreatable(value)) { - long temp = Long.parseLong(value); - if (maxOrMin) { - result = Math.max(result, temp); - } else { - result = Math.min(result, temp); - } - } - } - return String.valueOf(result); - } - - if (type.isFloatingPointType()) { - double result = 0.0; - for (String value : values) { - if (NumberUtils.isCreatable(value)) { - double temp = Double.parseDouble(value); - if (maxOrMin) { - result = Math.max(result, temp); - } else { - result = Math.min(result, temp); - } - } - } - return String.valueOf(result); - } - - // is not numeric type - values.sort(Comparator.naturalOrder()); - return values.size() > 0 ? values.get(values.size() - 1) : null; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTask.java deleted file mode 100644 index 9428c17df1..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTask.java +++ /dev/null @@ -1,172 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Env; -import org.apache.doris.common.DdlException; -import org.apache.doris.statistics.StatisticsTaskResult.TaskResult; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.List; -import java.util.concurrent.Callable; - -/** - * The StatisticsTask belongs to one StatisticsJob. - * A job may be split into multiple tasks but a task can only belong to one job. - * - * @granularityDesc, @categoryDesc, @statsTypeList - * These three attributes indicate which statistics this task is responsible for collecting. - * In general, a task will collect more than one @StatsType at the same time - * while all of types belong to the same @granularityDesc and @categoryDesc. - * For example: the task is responsible for collecting min, max, ndv of t1.c1 in partition p1. - * @granularityDesc: StatsGranularity=partition - */ -public abstract class StatisticsTask implements Callable { - protected static final Logger LOG = LogManager.getLogger(StatisticsTask.class); - - public enum TaskState { - PENDING, - RUNNING, - FINISHED, - FAILED - } - - protected long id = Env.getCurrentEnv().getNextId(); - protected long jobId; - protected List statsDescs; - protected TaskState taskState = TaskState.PENDING; - - protected final long createTime = System.currentTimeMillis(); - protected long startTime = -1L; - protected long finishTime = -1L; - - public StatisticsTask(long jobId, List statsDescs) { - this.jobId = jobId; - this.statsDescs = statsDescs; - } - - public long getId() { - return id; - } - - public void setId(long id) { - this.id = id; - } - - public long getJobId() { - return jobId; - } - - public List getStatsDescs() { - return statsDescs; - } - - public TaskState getTaskState() { - return taskState; - } - - public long getCreateTime() { - return createTime; - } - - public long getStartTime() { - return startTime; - } - - public long getFinishTime() { - return finishTime; - } - - /** - * Different statistics implement different collection methods. - * - * @return true if this task is finished, false otherwise - * @throws Exception - */ - @Override - public abstract StatisticsTaskResult call() throws Exception; - - // please retain job lock firstly - public void updateTaskState(TaskState newState) throws DdlException { - LOG.info("To change statistics task(id={}) state from {} to {}", id, taskState, newState); - String errorMsg = "Invalid statistics task state transition from "; - - // PENDING -> RUNNING/FAILED - if (taskState == TaskState.PENDING) { - switch (newState) { - case RUNNING: - startTime = System.currentTimeMillis(); - break; - case FAILED: - finishTime = System.currentTimeMillis(); - break; - default: - throw new DdlException(errorMsg + taskState + " to " + newState); - } - } else if (taskState == TaskState.RUNNING) { // RUNNING -> FINISHED/FAILED - switch (newState) { - case FINISHED: - case FAILED: - finishTime = System.currentTimeMillis(); - break; - default: - throw new DdlException(errorMsg + taskState + " to " + newState); - } - } else { // unsupported state transition - throw new DdlException(errorMsg + taskState + " to " + newState); - } - - LOG.info("Statistics task(id={}) state changed from {} to {}", id, taskState, newState); - taskState = newState; - } - - protected void checkStatisticsDesc() throws DdlException { - for (StatisticsDesc statsDesc : statsDescs) { - if (statsDesc == null) { - throw new DdlException("StatisticsDesc is null."); - } - - if (statsDesc.getStatsCategory() == null) { - throw new DdlException("Category is null."); - } - - if (statsDesc.getStatsGranularity() == null) { - throw new DdlException("Granularity is null."); - } - - Preconditions.checkState(statsDesc.getStatsCategory().getDbId() > 0L); - Preconditions.checkState(statsDesc.getStatsCategory().getTableId() > 0L); - } - } - - protected TaskResult createNewTaskResult(StatsCategory category, StatsGranularity granularity) { - TaskResult result = new TaskResult(); - result.setDbId(category.getDbId()); - result.setTableId(category.getTableId()); - result.setPartitionName(category.getPartitionName()); - result.setColumnName(category.getColumnName()); - result.setCategory(category.getCategory()); - result.setGranularity(granularity.getGranularity()); - result.setStatsTypeToValue(Maps.newHashMap()); - return result; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTaskResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTaskResult.java deleted file mode 100644 index ea5fb6ed34..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTaskResult.java +++ /dev/null @@ -1,132 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - - -import org.apache.doris.statistics.StatsCategory.Category; -import org.apache.doris.statistics.StatsGranularity.Granularity; - -import java.util.List; -import java.util.Map; -import java.util.Objects; - -public class StatisticsTaskResult { - private List taskResults; - - public StatisticsTaskResult(List taskResults) { - this.taskResults = taskResults; - } - - public List getTaskResults() { - return taskResults; - } - - public void setTaskResults(List taskResults) { - this.taskResults = taskResults; - } - - public static class TaskResult { - private long dbId = -1L; - private long tableId = -1L; - private String partitionName = ""; - private String columnName = ""; - - private Category category; - private Granularity granularity; - private Map statsTypeToValue; - - public long getDbId() { - return dbId; - } - - public void setDbId(long dbId) { - this.dbId = dbId; - } - - public long getTableId() { - return tableId; - } - - public void setTableId(long tableId) { - this.tableId = tableId; - } - - public String getPartitionName() { - return partitionName; - } - - public void setPartitionName(String partitionName) { - this.partitionName = partitionName; - } - - public String getColumnName() { - return columnName; - } - - public void setColumnName(String columnName) { - this.columnName = columnName; - } - - public Category getCategory() { - return category; - } - - public void setCategory(Category category) { - this.category = category; - } - - public Granularity getGranularity() { - return granularity; - } - - public void setGranularity(Granularity granularity) { - this.granularity = granularity; - } - - public Map getStatsTypeToValue() { - return statsTypeToValue; - } - - public void setStatsTypeToValue(Map statsTypeToValue) { - this.statsTypeToValue = statsTypeToValue; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - TaskResult that = (TaskResult) o; - return dbId == that.dbId - && tableId == that.tableId - && partitionName.equals(that.partitionName) - && columnName.equals(that.columnName) - && category == that.category - && granularity == that.granularity; - } - - @Override - public int hashCode() { - return Objects.hash(dbId, tableId, partitionName, - columnName, category, granularity); - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTaskScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTaskScheduler.java deleted file mode 100644 index b94f6d3175..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTaskScheduler.java +++ /dev/null @@ -1,198 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.analysis.AnalyzeStmt; -import org.apache.doris.catalog.Env; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.Config; -import org.apache.doris.common.DdlException; -import org.apache.doris.common.ThreadPoolManager; -import org.apache.doris.common.util.MasterDaemon; -import org.apache.doris.statistics.StatisticsJob.JobState; -import org.apache.doris.statistics.StatisticsTask.TaskState; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Queues; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.concurrent.CancellationException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -/** - * Schedule statistics task - */ -public class StatisticsTaskScheduler extends MasterDaemon { - private static final Logger LOG = LogManager.getLogger(StatisticsTaskScheduler.class); - - private final Queue queue = Queues.newLinkedBlockingQueue(); - - public StatisticsTaskScheduler() { - super("Statistics task scheduler", - Config.statistic_task_scheduler_execution_interval_ms); - } - - @Override - protected void runAfterCatalogReady() { - // step1: task n concurrent tasks from the queue - List tasks = peek(); - - if (!tasks.isEmpty()) { - ThreadPoolExecutor executor = ThreadPoolManager.newDaemonCacheThreadPool(tasks.size(), - "statistic-pool", false); - StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager(); - Map statisticsJobs = jobManager.getIdToStatisticsJob(); - Map>>> resultMap = Maps.newLinkedHashMap(); - - for (StatisticsTask task : tasks) { - long jobId = task.getJobId(); - - if (checkJobIsValid(jobId)) { - // step2: execute task and save task result - Future future = executor.submit(task); - StatisticsJob statisticsJob = statisticsJobs.get(jobId); - - if (updateTaskAndJobState(task, statisticsJob)) { - Map> taskInfo = Maps.newHashMap(); - taskInfo.put(task.getId(), future); - List>> jobInfo = resultMap - .getOrDefault(jobId, Lists.newArrayList()); - jobInfo.add(taskInfo); - resultMap.put(jobId, jobInfo); - } - } - } - - // step3: handle task results - handleTaskResult(resultMap); - } - } - - public void addTasks(List statisticsTaskList) throws IllegalStateException { - queue.addAll(statisticsTaskList); - } - - private List peek() { - List tasks = Lists.newArrayList(); - int i = Config.cbo_concurrency_statistics_task_num; - while (i > 0) { - StatisticsTask task = queue.poll(); - if (task == null) { - break; - } - tasks.add(task); - i--; - } - return tasks; - } - - /** - * Update task and job state - * - * @param task statistics task - * @param job statistics job - * @return true if update task and job state successfully. - */ - private boolean updateTaskAndJobState(StatisticsTask task, StatisticsJob job) { - try { - // update task state - task.updateTaskState(TaskState.RUNNING); - } catch (DdlException e) { - LOG.info("Update statistics task state failed, taskId: " + task.getId(), e); - } - - try { - // update job state - if (task.getTaskState() != TaskState.RUNNING) { - job.updateJobState(JobState.FAILED); - } else { - if (job.getJobState() == JobState.SCHEDULING) { - job.updateJobState(JobState.RUNNING); - } - } - } catch (DdlException e) { - LOG.info("Update statistics job state failed, jobId: " + job.getId(), e); - return false; - } - return true; - } - - private void handleTaskResult(Map>>> resultMap) { - StatisticsManager statsManager = Env.getCurrentEnv().getStatisticsManager(); - StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager(); - - resultMap.forEach((jobId, taskMapList) -> { - if (checkJobIsValid(jobId)) { - StatisticsJob statisticsJob = jobManager.getIdToStatisticsJob().get(jobId); - Map properties = statisticsJob.getProperties(); - long timeout = Long.parseLong(properties.get(AnalyzeStmt.CBO_STATISTICS_TASK_TIMEOUT_SEC)); - - // For tasks with tablet granularity, - // we need aggregate calculations to get the results of the statistics, - // so we need to put all the tasks together and handle the results together. - List taskResults = Lists.newArrayList(); - - for (Map> taskInfos : taskMapList) { - taskInfos.forEach((taskId, future) -> { - String errorMsg = ""; - - try { - StatisticsTaskResult taskResult = future.get(timeout, TimeUnit.SECONDS); - taskResults.add(taskResult); - } catch (TimeoutException | ExecutionException | InterruptedException - | CancellationException e) { - errorMsg = e.getMessage(); - LOG.error("Failed to get statistics. jobId: {}, taskId: {}, e: {}", jobId, taskId, e); - } - - try { - statisticsJob.updateJobInfoByTaskId(taskId, errorMsg); - } catch (DdlException e) { - LOG.info("Failed to update statistics job info. jobId: {}, e: {}", jobId, e); - } - }); - } - - try { - statsManager.updateStatistics(taskResults); - } catch (AnalysisException e) { - LOG.info("Failed to update statistics. jobId: {}, e: {}", jobId, e); - } - } - }); - } - - public boolean checkJobIsValid(Long jobId) { - StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager(); - StatisticsJob statisticsJob = jobManager.getIdToStatisticsJob().get(jobId); - if (statisticsJob == null) { - return false; - } - JobState jobState = statisticsJob.getJobState(); - return jobState != JobState.CANCELLED && jobState != JobState.FAILED; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java deleted file mode 100644 index ad840dd70b..0000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java +++ /dev/null @@ -1,309 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Type; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.util.Util; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Predicate; - -/** - * There are the statistics of table. - * The table stats are mainly used to provide input for the Optimizer's cost model. - * The description of table stats are following: - * - @rowCount: The row count of table. - * - @dataSize: The data size of table. - * - @nameToColumnStats: <@String columnName, @ColumnStats columnStats> - *

- * Each column in the Table will have corresponding @ColumnStats. - * Those @ColumnStats are recorded in @nameToColumnStats form of MAP. - * This facilitates the optimizer to quickly find the corresponding: - * - @ColumnStats based on the column name. - * - @rowCount: The row count of table. - * - @dataSize: The data size of table. - *

- * The granularity of the statistics is whole table. - * For example: "@rowCount = 1000" means that the row count is 1000 in the whole table. - *

- * After the statistics task is successfully completed, update the TableStats, - * TableStats should not be updated in any other way. - */ -public class TableStats { - public static final StatsType DATA_SIZE = StatsType.DATA_SIZE; - public static final StatsType ROW_COUNT = StatsType.ROW_COUNT; - - private static final Predicate DESIRED_ROW_COUNT_PRED = (v) -> v >= -1L; - private static final Predicate DESIRED_DATA_SIZE_PRED = (v) -> v >= -1L; - - private double rowCount = -1; - private long dataSize = -1; - private final Map nameToPartitionStats = Maps.newConcurrentMap(); - private final Map nameToColumnStats = Maps.newConcurrentMap(); - - /** - * Return a default partition statistic. - */ - public static TableStats getDefaultTableStats() { - return new TableStats(); - } - - public TableStats() { - } - - public TableStats(double rowCount, long dataSize) { - this.rowCount = rowCount; - this.dataSize = dataSize; - } - - public double getRowCount() { - // '!isEmpty()' is added mainly because the result returns 0 - // instead of the expected -1 when nameToPartitionStats is empty. - if (rowCount == -1 && !nameToPartitionStats.isEmpty()) { - return nameToPartitionStats.values().stream() - .filter(partitionStats -> partitionStats.getRowCount() != -1) - .mapToLong(PartitionStats::getRowCount).sum(); - } - return rowCount; - } - - public long getDataSize() { - if (dataSize == -1 && !nameToPartitionStats.isEmpty()) { - return nameToPartitionStats.values().stream() - .filter(partitionStats -> partitionStats.getDataSize() != -1) - .mapToLong(PartitionStats::getDataSize).sum(); - } - return dataSize; - } - - public Map getNameToPartitionStats() { - return nameToPartitionStats; - } - - public Map getNameToColumnStats() { - if (nameToColumnStats.isEmpty()) { - return getAggPartitionColStats(); - } - return nameToColumnStats; - } - - public PartitionStats getPartitionStats(String partitionName) { - return nameToPartitionStats.get(partitionName); - } - - /** - * If the partition statistics do not exist, the default statistics will be returned. - */ - public PartitionStats getPartitionStatsOrDefault(String columnName) { - return nameToPartitionStats.getOrDefault(columnName, - PartitionStats.getDefaultPartitionStats()); - } - - /** - * If the column statistics do not exist, the default statistics will be returned. - */ - public ColumnStat getColumnStatsOrDefault(String columnName) { - return nameToColumnStats.getOrDefault(columnName, - ColumnStat.getDefaultColumnStats()); - } - - /** - * After the statistics task is successfully completed, update the statistics of the partition, - * statistics should not be updated in any other way. - */ - public void updateTableStats(Map statsTypeToValue) throws AnalysisException { - for (Map.Entry entry : statsTypeToValue.entrySet()) { - if (entry.getKey() == ROW_COUNT) { - rowCount = Util.getDoublePropertyOrDefault(entry.getValue(), rowCount, - DESIRED_ROW_COUNT_PRED, ROW_COUNT + " should >= -1"); - } else if (entry.getKey() == DATA_SIZE) { - dataSize = Util.getLongPropertyOrDefault(entry.getValue(), dataSize, - DESIRED_DATA_SIZE_PRED, DATA_SIZE + " should >= -1"); - } - } - } - - /** - * After the statistics task is successfully completed, update the statistics of the partition, - * statistics should not be updated in any other way. - */ - public void updatePartitionStats(String partitionName, Map statsTypeToValue) - throws AnalysisException { - PartitionStats partitionStats = getNotNullPartitionStats(partitionName); - partitionStats.updatePartitionStats(statsTypeToValue); - } - - /** - * After the statistics task is successfully completed, update the statistics of the column, - * statistics should not be updated in any other way. - */ - public void updateColumnStats(String columnName, Type columnType, Map statsTypeToValue) - throws AnalysisException { - ColumnStat columnStat = getColumnStats(columnName); - columnStat.updateStats(columnType, statsTypeToValue); - } - - /** - * If partition stats is not exist, create a new one. - * - * @param partitionName partition name - * @return @PartitionStats - */ - private PartitionStats getNotNullPartitionStats(String partitionName) { - PartitionStats partitionStat = nameToPartitionStats.get(partitionName); - if (partitionStat == null) { - partitionStat = new PartitionStats(); - nameToPartitionStats.put(partitionName, partitionStat); - } - return partitionStat; - } - - /** - * If column stats is not exist, create a new one. - * - * @param columnName column name - * @return @ColumnStats - */ - private ColumnStat getNotNullColumnStats(String columnName) { - ColumnStat columnStat = nameToColumnStats.get(columnName); - if (columnStat == null) { - columnStat = new ColumnStat(); - nameToColumnStats.put(columnName, columnStat); - } - return columnStat; - } - - public ColumnStat getColumnStats(String columnName) { - ColumnStat columnStat = nameToColumnStats.get(columnName); - if (columnStat == null) { - columnStat = new ColumnStat(); - nameToColumnStats.put(columnName, columnStat); - } - return columnStat; - } - - public ColumnStat getColumnStatCopy(String columnName) { - ColumnStat columnStat = getColumnStats(columnName); - return columnStat.copy(); - } - - public List getShowInfo() { - List result = Lists.newArrayList(); - result.add(Double.toString(getRowCount())); - result.add(Long.toString(getDataSize())); - return result; - } - - public List getShowInfo(String partitionName) { - PartitionStats partitionStats = nameToPartitionStats.get(partitionName); - return partitionStats.getShowInfo(); - } - - private Map getAggPartitionColStats() { - Map aggColumnStats = new HashMap<>(); - for (PartitionStats partitionStats : nameToPartitionStats.values()) { - partitionStats.getNameToColumnStats().forEach((colName, columnStats) -> { - if (!aggColumnStats.containsKey(colName)) { - aggColumnStats.put(colName, columnStats.copy()); - } else { - ColumnStat tblColStats = aggColumnStats.get(colName); - mergePartitionColumnStats(tblColStats, columnStats); - } - }); - } - - return aggColumnStats; - } - - private void mergePartitionColumnStats(ColumnStat leftStats, ColumnStat rightStats) { - if (leftStats.getNdv() == -1) { - if (rightStats.getNdv() != -1) { - leftStats.setNdv(rightStats.getNdv()); - } - } else { - if (rightStats.getNdv() != -1) { - double ndv = leftStats.getNdv() + rightStats.getNdv(); - leftStats.setNdv(ndv); - } - } - - if (leftStats.getAvgSizeByte() == -1) { - if (rightStats.getAvgSizeByte() != -1) { - leftStats.setAvgSizeByte(rightStats.getAvgSizeByte()); - } - } else { - if (rightStats.getAvgSizeByte() != -1) { - double avgSize = (leftStats.getAvgSizeByte() + rightStats.getAvgSizeByte()) / 2; - leftStats.setAvgSizeByte(avgSize); - } - } - - if (leftStats.getMaxSizeByte() == -1) { - if (rightStats.getMaxSizeByte() != -1) { - leftStats.setMaxSizeByte(rightStats.getMaxSizeByte()); - } - } else { - if (rightStats.getMaxSizeByte() != -1) { - double maxSize = Math.max(leftStats.getMaxSizeByte(), rightStats.getMaxSizeByte()); - leftStats.setMaxSizeByte(maxSize); - } - } - - if (leftStats.getNumNulls() == -1) { - if (rightStats.getNumNulls() != -1) { - leftStats.setNumNulls(rightStats.getNumNulls()); - } - } else { - if (rightStats.getNumNulls() != -1) { - double numNulls = leftStats.getNumNulls() + rightStats.getNumNulls(); - leftStats.setNumNulls(numNulls); - } - } - - if (Double.isNaN(leftStats.getMinValue())) { - if (!Double.isNaN(rightStats.getMinValue())) { - leftStats.setMinValue(rightStats.getMinValue()); - } - } else if (!Double.isNaN(rightStats.getMinValue())) { - double minValue = Math.max(leftStats.getMinValue(), rightStats.getMinValue()); - leftStats.setMinValue(minValue); - } - - if (Double.isNaN(leftStats.getMaxValue())) { - if (!Double.isNaN(rightStats.getMaxValue())) { - leftStats.setMaxValue(rightStats.getMaxValue()); - } - } else if (!Double.isNaN(rightStats.getMaxValue())) { - double maxValue = Math.min(leftStats.getMaxValue(), rightStats.getMaxValue()); - leftStats.setMaxValue(maxValue); - } - } - - /** - * This method is for unit test. - */ - public void putColumnStats(String name, ColumnStat columnStat) { - nameToColumnStats.put(name, columnStat); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java index dd98200ec8..97782f402a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java @@ -36,15 +36,10 @@ import org.apache.doris.nereids.types.IntegerType; import org.apache.doris.nereids.util.MemoTestUtils; import org.apache.doris.nereids.util.PlanConstructor; import org.apache.doris.qe.ConnectContext; -import org.apache.doris.statistics.ColumnStat; -import org.apache.doris.statistics.StatisticsManager; import org.apache.doris.statistics.StatsDeriveResult; -import org.apache.doris.statistics.TableStats; import com.google.common.collect.ImmutableList; import mockit.Expectations; -import mockit.Mock; -import mockit.MockUp; import mockit.Mocked; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -60,8 +55,6 @@ public class DeriveStatsJobTest { ConnectContext context; @Mocked Env env; - @Mocked - StatisticsManager statisticsManager; SlotReference slot1; @@ -81,14 +74,6 @@ public class DeriveStatsJobTest { } private LogicalOlapScan constructOlapSCan() throws AnalysisException { - ColumnStat columnStats1 = new ColumnStat(10, 0, 0, 5, - Double.NaN, Double.NaN); - new MockUp(TableStats.class) { - @Mock - public ColumnStat getColumnStats(String columnName) { - return columnStats1; - } - }; long tableId1 = 0; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java index 8342af493a..69d50d5db8 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java @@ -36,11 +36,9 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalTopN; import org.apache.doris.nereids.types.IntegerType; import org.apache.doris.nereids.util.PlanConstructor; import org.apache.doris.qe.ConnectContext; -import org.apache.doris.statistics.ColumnStat; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.StatsDeriveResult; -import org.apache.doris.statistics.TableStats; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -238,13 +236,7 @@ public class StatsCalculatorTest { @Test public void testOlapScan(@Mocked ConnectContext context) { - ColumnStat columnStat1 = new ColumnStat(); - columnStat1.setNdv(10); - columnStat1.setNumNulls(5); long tableId1 = 0; - TableStats tableStats1 = new TableStats(); - tableStats1.putColumnStats("c1", columnStat1); - List qualifier = ImmutableList.of("test", "t"); SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java deleted file mode 100644 index 1eeeed15a1..0000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java +++ /dev/null @@ -1,154 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.AnalysisException; - -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class ColumnStatsTest { - private ColumnStat columnStatsUnderTest; - - @Before - public void setUp() throws Exception { - columnStatsUnderTest = new ColumnStat(); - } - - @Test - public void testUpdateStats() throws Exception { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.MAX_SIZE, "8"); - statsTypeToValue.put(StatsType.MIN_VALUE, "0"); - statsTypeToValue.put(StatsType.MAX_VALUE, "100"); - - // Run the test - columnStatsUnderTest.updateStats(columnType, statsTypeToValue); - - // Verify the results - double maxSize = columnStatsUnderTest.getMaxSizeByte(); - Assert.assertEquals(8, maxSize, 0.1); - - double minValue = columnStatsUnderTest.getMinValue(); - Assert.assertEquals(0, minValue, 0.1); - - double maxValue = columnStatsUnderTest.getMaxValue(); - Assert.assertEquals(100, maxValue, 0.1); - } - - @Test - public void testUpdateStats_ThrowsAnalysisException() { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.AVG_SIZE, "abc"); - - // Run the test - Assert.assertThrows(AnalysisException.class, - () -> columnStatsUnderTest.updateStats(columnType, statsTypeToValue)); - } - - @Test - public void testGetShowInfo() throws AnalysisException { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.NDV, "1"); - statsTypeToValue.put(StatsType.AVG_SIZE, "8"); - statsTypeToValue.put(StatsType.MAX_SIZE, "8"); - statsTypeToValue.put(StatsType.NUM_NULLS, "2"); - statsTypeToValue.put(StatsType.MIN_VALUE, "0"); - statsTypeToValue.put(StatsType.MAX_VALUE, "1000"); - - columnStatsUnderTest.updateStats(columnType, statsTypeToValue); - String[] expectedInfo = {"1.0", "8.0", "8.0", "2.0", "0.0", "1000.0"}; - - // Run the test - List showInfo = columnStatsUnderTest.getShowInfo(); - String[] result = showInfo.toArray(new String[0]); - - // Verify the results - Assert.assertArrayEquals(expectedInfo, result); - } - - @Test - public void testGetDefaultColumnStats() { - // Run the test - ColumnStat defaultColumnStats = ColumnStat.getDefaultColumnStats(); - - // Verify the results - double ndv = defaultColumnStats.getNdv(); - Assert.assertEquals(-1L, ndv, 0.1); - - double avgSize = defaultColumnStats.getAvgSizeByte(); - Assert.assertEquals(-1.0f, avgSize, 0.0001); - - double maxSize = defaultColumnStats.getMaxSizeByte(); - Assert.assertEquals(-1L, maxSize, 0.1); - - double maxValue = defaultColumnStats.getMaxValue(); - Assert.assertEquals(Double.NaN, maxValue, 0.1); - - double minValue = defaultColumnStats.getMinValue(); - Assert.assertEquals(Double.NaN, minValue, 0.1); - } - - @Test - public void testAggColumnStats() throws Exception { - // Setup - ColumnStat columnStats = ColumnStat.getDefaultColumnStats(); - ColumnStat other = new ColumnStat(1L, 4.0f, 5L, 10L, - Double.NaN, - Double.NaN); - - // Run the test - ColumnStat aggColumnStats = ColumnStat.mergeColumnStats(columnStats, other); - - // Verify the results - double ndv = aggColumnStats.getNdv(); - // 0(default) + 1 - Assert.assertEquals(1L, ndv, 0.1); - - double avgSize = aggColumnStats.getAvgSizeByte(); - // (0.0f + 4.0f) / 2 - Assert.assertEquals(4.0f, avgSize, 0.0001); - - double maxSize = aggColumnStats.getMaxSizeByte(); - Assert.assertEquals(5L, maxSize, 0.1); - - double numNulls = aggColumnStats.getNumNulls(); - Assert.assertEquals(10L, numNulls, 0.1); - - double minValue = aggColumnStats.getMinValue(); - // null VS sMinValue - Assert.assertEquals(Double.NaN, minValue, 0.1); - - double maxValue = aggColumnStats.getMaxValue(); - // null VS sMaxValue - Assert.assertEquals(Double.NaN, maxValue, 0.1); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java deleted file mode 100644 index 522877bc0a..0000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java +++ /dev/null @@ -1,136 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.AnalysisException; - -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class PartitionStatsTest { - private PartitionStats partitionStatsUnderTest; - - @Before - public void setUp() throws Exception { - partitionStatsUnderTest = new PartitionStats(); - } - - @Test - public void testUpdatePartitionStats() throws Exception { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); - - // Run the test - partitionStatsUnderTest.updatePartitionStats(statsTypeToValue); - - // Verify the results - long rowCount = partitionStatsUnderTest.getRowCount(); - Assert.assertEquals(1000, rowCount); - - long dataSize = partitionStatsUnderTest.getDataSize(); - Assert.assertEquals(10240, dataSize); - } - - @Test - public void testUpdatePartitionStats_ThrowsAnalysisException() { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.AVG_SIZE, "8"); - statsTypeToValue.put(StatsType.ROW_COUNT, "abc"); - - // Run the test - Assert.assertThrows(AnalysisException.class, - () -> partitionStatsUnderTest.updatePartitionStats(statsTypeToValue)); - } - - @Test - public void testUpdateColumnStats() throws Exception { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.NDV, "1"); - statsTypeToValue.put(StatsType.AVG_SIZE, "8"); - statsTypeToValue.put(StatsType.MAX_SIZE, "8"); - statsTypeToValue.put(StatsType.NUM_NULLS, "2"); - statsTypeToValue.put(StatsType.MIN_VALUE, "0"); - statsTypeToValue.put(StatsType.MAX_VALUE, "1000"); - - // Run the test - partitionStatsUnderTest.updateColumnStats("columnName", columnType, statsTypeToValue); - ColumnStat columnStats = partitionStatsUnderTest.getColumnStats("columnName"); - - // Verify the results - double ndv = columnStats.getNdv(); - Assert.assertEquals(1, ndv, 0.1); - - double avgSize = columnStats.getAvgSizeByte(); - Assert.assertEquals(8.0f, avgSize, 0.0001); - - double maxSize = columnStats.getMaxSizeByte(); - Assert.assertEquals(8, maxSize, 0.1); - - double maxValue = columnStats.getMaxValue(); - Assert.assertEquals(1000, maxValue, 0.1); - - double minValue = columnStats.getMinValue(); - Assert.assertEquals(0, minValue, 0.1); - - double numNulls = columnStats.getNumNulls(); - Assert.assertEquals(2, numNulls, 0.1); - } - - @Test - public void testUpdateColumnStats_ThrowsAnalysisException() { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.AVG_SIZE, "abc"); - - // Run the test - Assert.assertThrows( - AnalysisException.class, () -> partitionStatsUnderTest - .updateColumnStats("columnName", columnType, statsTypeToValue)); - } - - @Test - public void testGetShowInfo() throws AnalysisException { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); - - partitionStatsUnderTest.updatePartitionStats(statsTypeToValue); - String[] expectedInfo = {"1000", "10240"}; - - // Run the test - List showInfo = partitionStatsUnderTest.getShowInfo(); - String[] result = showInfo.toArray(new String[0]); - - // Run the test - Assert.assertArrayEquals(expectedInfo, result); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/SQLStatisticsTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/SQLStatisticsTaskTest.java deleted file mode 100644 index 7abf675223..0000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/SQLStatisticsTaskTest.java +++ /dev/null @@ -1,207 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.HashDistributionInfo; -import org.apache.doris.catalog.KeysType; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.PartitionInfo; -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.common.DdlException; -import org.apache.doris.common.jmockit.Deencapsulation; -import org.apache.doris.datasource.InternalCatalog; -import org.apache.doris.statistics.util.InternalQuery; -import org.apache.doris.statistics.util.InternalQueryResult; - -import mockit.Mock; -import mockit.MockUp; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; - - -public class SQLStatisticsTaskTest { - private SQLStatisticsTask sqlStatisticsTaskUnderTest; - - @Before - public void setUp() throws Exception { - StatsCategory statsCategory = new StatsCategory(); - StatsGranularity statsGranularity = new StatsGranularity(); - List statsTypes = Collections.singletonList(StatsType.ROW_COUNT); - sqlStatisticsTaskUnderTest = new SQLStatisticsTask(0L, - Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes))); - - InternalCatalog catalog = Env.getCurrentInternalCatalog(); - Column column = new Column("columnName", PrimitiveType.STRING); - OlapTable tableName = new OlapTable(0L, "tableName", - Collections.singletonList(column), KeysType.AGG_KEYS, - new PartitionInfo(), new HashDistributionInfo()); - Database database = new Database(0L, "db"); - database.createTable(tableName); - - ConcurrentHashMap fullNameToDb = new ConcurrentHashMap<>(); - fullNameToDb.put("cluster:db", database); - Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb); - - ConcurrentHashMap idToDb = new ConcurrentHashMap<>(); - idToDb.put(0L, database); - Deencapsulation.setField(catalog, "idToDb", idToDb); - - List columns = Collections.singletonList("row_count"); - List types = Arrays.asList(PrimitiveType.STRING, - PrimitiveType.INT, PrimitiveType.FLOAT, - PrimitiveType.DOUBLE, PrimitiveType.BIGINT); - InternalQueryResult queryResult = new InternalQueryResult(); - InternalQueryResult.ResultRow resultRow = - new InternalQueryResult.ResultRow(columns, types, Collections.singletonList("1000")); - queryResult.getResultRows().add(resultRow); - - new MockUp(InternalQuery.class) { - @Mock - public InternalQueryResult query() { - return queryResult; - } - }; - } - - @Test - public void testConstructQuery() throws Exception { - // Setup - String expectedSQL = "SELECT COUNT(1) AS row_count FROM tableName;"; - - StatsCategory statsCategory = new StatsCategory(); - statsCategory.setCategory(StatsCategory.Category.TABLE); - statsCategory.setDbId(0L); - statsCategory.setTableId(0L); - statsCategory.setPartitionName("partitionName"); - statsCategory.setColumnName("columnName"); - statsCategory.setStatsValue("statsValue"); - - StatsGranularity statsGranularity = new StatsGranularity(); - statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE); - statsGranularity.setTableId(0L); - statsGranularity.setPartitionId(0L); - statsGranularity.setTabletId(0L); - - StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity, - Collections.singletonList(StatsType.ROW_COUNT)); - - // Run the test - String result = sqlStatisticsTaskUnderTest.constructQuery(statsDesc); - - // Verify the results - Assert.assertEquals(expectedSQL, result); - } - - @Test - public void testConstructQuery_ThrowsDdlException() { - // Setup - StatsCategory statsCategory = new StatsCategory(); - statsCategory.setCategory(StatsCategory.Category.TABLE); - statsCategory.setDbId(0L); - statsCategory.setTableId(0L); - statsCategory.setPartitionName("partitionName"); - statsCategory.setColumnName("columnName"); - statsCategory.setStatsValue("statsValue"); - - StatsGranularity statsGranularity = new StatsGranularity(); - statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE); - statsGranularity.setTableId(0L); - statsGranularity.setPartitionId(0L); - statsGranularity.setTabletId(0L); - - StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity, - Collections.singletonList(StatsType.UNKNOWN)); - - // Run the test - Assert.assertThrows(DdlException.class, - () -> sqlStatisticsTaskUnderTest.constructQuery(statsDesc)); - } - - @Test - public void testExecuteQuery() throws Exception { - // Setup - StatsCategory statsCategory = new StatsCategory(); - statsCategory.setCategory(StatsCategory.Category.TABLE); - statsCategory.setDbId(0L); - statsCategory.setTableId(0L); - statsCategory.setPartitionName("partitionName"); - statsCategory.setColumnName("columnName"); - statsCategory.setStatsValue("statsValue"); - - StatsGranularity statsGranularity = new StatsGranularity(); - statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE); - statsGranularity.setTableId(0L); - statsGranularity.setPartitionId(0L); - statsGranularity.setTabletId(0L); - - StatisticsTaskResult.TaskResult expectedResult = new StatisticsTaskResult.TaskResult(); - expectedResult.setDbId(0L); - expectedResult.setTableId(0L); - expectedResult.setPartitionName("partitionName"); - expectedResult.setColumnName("columnName"); - expectedResult.setCategory(StatsCategory.Category.TABLE); - expectedResult.setGranularity(StatsGranularity.Granularity.TABLE); - HashMap hashMap = new HashMap<>(); - hashMap.put(StatsType.ROW_COUNT, "1000"); - expectedResult.setStatsTypeToValue(hashMap); - - StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity, - Collections.singletonList(StatsType.ROW_COUNT)); - - // Run the test - StatisticsTaskResult.TaskResult result = sqlStatisticsTaskUnderTest.executeQuery(statsDesc); - - // Verify the results - Assert.assertEquals(expectedResult, result); - } - - @Test - public void testExecuteQuery_ThrowsException() { - // Setup - StatsCategory statsCategory = new StatsCategory(); - statsCategory.setCategory(StatsCategory.Category.TABLE); - statsCategory.setDbId(0L); - statsCategory.setTableId(0L); - statsCategory.setPartitionName("partitionName"); - statsCategory.setColumnName("columnName"); - statsCategory.setStatsValue("statsValue"); - - StatsGranularity statsGranularity = new StatsGranularity(); - statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE); - statsGranularity.setTableId(0L); - statsGranularity.setPartitionId(0L); - statsGranularity.setTabletId(0L); - - StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity, - Arrays.asList(StatsType.NDV, StatsType.MAX_VALUE, StatsType.MIN_VALUE)); - - // Run the test - Assert.assertThrows(Exception.class, - () -> sqlStatisticsTaskUnderTest.executeQuery(statsDesc)); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/SampleSQLStatisticsTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/SampleSQLStatisticsTaskTest.java deleted file mode 100644 index 176d5eee0d..0000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/SampleSQLStatisticsTaskTest.java +++ /dev/null @@ -1,205 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.HashDistributionInfo; -import org.apache.doris.catalog.KeysType; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.PartitionInfo; -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.common.DdlException; -import org.apache.doris.common.jmockit.Deencapsulation; -import org.apache.doris.datasource.InternalCatalog; -import org.apache.doris.statistics.StatsCategory.Category; -import org.apache.doris.statistics.StatsGranularity.Granularity; -import org.apache.doris.statistics.util.InternalQuery; -import org.apache.doris.statistics.util.InternalQueryResult; - -import mockit.Mock; -import mockit.MockUp; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -public class SampleSQLStatisticsTaskTest { - - private SampleSQLStatisticsTask sampleSQLStatisticsTaskUnderTest; - - @Before - public void setUp() throws Exception { - InternalCatalog catalog = Env.getCurrentInternalCatalog(); - Column column = new Column("columnName", PrimitiveType.STRING); - OlapTable tableName = new OlapTable(0L, "tableName", - Collections.singletonList(column), KeysType.AGG_KEYS, - new PartitionInfo(), new HashDistributionInfo()); - Database database = new Database(0L, "db"); - database.createTable(tableName); - - ConcurrentHashMap fullNameToDb = new ConcurrentHashMap<>(); - fullNameToDb.put("cluster:db", database); - Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb); - - ConcurrentHashMap idToDb = new ConcurrentHashMap<>(); - idToDb.put(0L, database); - Deencapsulation.setField(catalog, "idToDb", idToDb); - - List columns = Collections.singletonList("row_count"); - List types = Arrays.asList(PrimitiveType.STRING, - PrimitiveType.INT, PrimitiveType.FLOAT, - PrimitiveType.DOUBLE, PrimitiveType.BIGINT); - InternalQueryResult queryResult = new InternalQueryResult(); - InternalQueryResult.ResultRow resultRow = - new InternalQueryResult.ResultRow(columns, types, Collections.singletonList("1000")); - queryResult.getResultRows().add(resultRow); - - StatsCategory statsCategory = new StatsCategory(); - StatsGranularity statsGranularity = new StatsGranularity(); - List statsTypes = Collections.singletonList(StatsType.ROW_COUNT); - sampleSQLStatisticsTaskUnderTest = new SampleSQLStatisticsTask(0L, - Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes))); - - new MockUp(InternalQuery.class) { - @Mock - public InternalQueryResult query() { - return queryResult; - } - }; - } - - @Test - public void testGetQueryParams() throws Exception { - // Setup - Map expectedResult = new HashMap<>(); - expectedResult.put("table", "tableName"); - expectedResult.put("partition", "partitionName"); - expectedResult.put("column", "columnName"); - expectedResult.put("percent", "10"); - - StatsCategory category = new StatsCategory(); - category.setCategory(Category.TABLE); - category.setDbId(0L); - category.setTableId(0L); - category.setPartitionName("partitionName"); - category.setColumnName("columnName"); - category.setStatsValue("statsValue"); - - StatsGranularity statsGranularity = new StatsGranularity(); - statsGranularity.setGranularity(Granularity.TABLE); - statsGranularity.setTableId(0L); - statsGranularity.setPartitionId(0L); - statsGranularity.setTabletId(0L); - - StatisticsDesc statsDesc = new StatisticsDesc(category, statsGranularity, - Collections.singletonList(StatsType.ROW_COUNT)); - - // Run the test - Map result = sampleSQLStatisticsTaskUnderTest.getQueryParams(statsDesc); - - // Verify the results - Assert.assertEquals(expectedResult, result); - } - - @Test - public void testGetQueryParams_ThrowsDdlException() { - // Setup - StatsCategory category = new StatsCategory(); - category.setCategory(Category.TABLE); - category.setDbId(-1L); - category.setTableId(0L); - category.setPartitionName("partitionName"); - category.setColumnName("columnName"); - category.setStatsValue("statsValue"); - - StatsGranularity statsGranularity = new StatsGranularity(); - statsGranularity.setGranularity(Granularity.PARTITION); - statsGranularity.setTableId(0L); - statsGranularity.setPartitionId(0L); - statsGranularity.setTabletId(0L); - - StatisticsDesc statsDesc = new StatisticsDesc(category, statsGranularity, - Collections.singletonList(StatsType.ROW_COUNT)); - - // Run the test - Assert.assertThrows(DdlException.class, - () -> sampleSQLStatisticsTaskUnderTest.getQueryParams(statsDesc)); - } - - @Test - public void testConstructQuery() throws Exception { - // Setup - String expectedSQL = "SELECT COUNT(1) AS row_count FROM tableName TABLESAMPLE(10 PERCENT);"; - - StatsCategory statsCategory = new StatsCategory(); - statsCategory.setCategory(StatsCategory.Category.TABLE); - statsCategory.setDbId(0L); - statsCategory.setTableId(0L); - statsCategory.setPartitionName("partitionName"); - statsCategory.setColumnName("columnName"); - statsCategory.setStatsValue("statsValue"); - - StatsGranularity statsGranularity = new StatsGranularity(); - statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE); - statsGranularity.setTableId(0L); - statsGranularity.setPartitionId(0L); - statsGranularity.setTabletId(0L); - - StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity, - Collections.singletonList(StatsType.ROW_COUNT)); - - // Run the test - String result = sampleSQLStatisticsTaskUnderTest.constructQuery(statsDesc); - - // Verify the results - Assert.assertEquals(expectedSQL, result); - } - - @Test - public void testExecuteQuery_ThrowsException() { - // Setup - StatsGranularity statsGranularity = new StatsGranularity(); - statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE); - statsGranularity.setTableId(0L); - statsGranularity.setPartitionId(0L); - statsGranularity.setTabletId(0L); - - StatsCategory statsCategory = new StatsCategory(); - statsCategory.setCategory(StatsCategory.Category.TABLE); - statsCategory.setDbId(0L); - statsCategory.setTableId(0L); - statsCategory.setPartitionName("partitionName"); - statsCategory.setColumnName("columnName"); - statsCategory.setStatsValue("statsValue"); - - StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity, - Arrays.asList(StatsType.NDV, StatsType.MAX_VALUE, StatsType.MIN_VALUE)); - - // Run the test - Assert.assertThrows(Exception.class, - () -> sampleSQLStatisticsTaskUnderTest.executeQuery(statsDesc)); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java deleted file mode 100644 index e715910af0..0000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java +++ /dev/null @@ -1,182 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.HashDistributionInfo; -import org.apache.doris.catalog.KeysType; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.PartitionInfo; -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.common.jmockit.Deencapsulation; -import org.apache.doris.datasource.InternalCatalog; -import org.apache.doris.system.SystemInfoService; - -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import mockit.Mock; -import mockit.MockUp; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -public class StatisticsJobSchedulerTest { - private StatisticsJob statisticsJob; - - private StatisticsJobScheduler statisticsJobSchedulerUnderTest; - - @Before - public void setUp() throws Exception { - HashSet tblIds = Sets.newHashSet(); - tblIds.add(0L); - tblIds.add(1L); - - Map> tableIdToColumnName = Maps.newHashMap(); - tableIdToColumnName.put(0L, Arrays.asList("c1", "c2")); - tableIdToColumnName.put(1L, Arrays.asList("c1", "c2")); - Map> tblIdToPartitionName = Maps.newHashMap(); - - statisticsJob = new StatisticsJob(0L, tblIds, tblIdToPartitionName, - tableIdToColumnName, null); - statisticsJobSchedulerUnderTest = new StatisticsJobScheduler(); - statisticsJobSchedulerUnderTest.addPendingJob(statisticsJob); - } - - @Test - public void testRunAfterCatalogReady() { - // Setup - Column col1 = new Column("c1", PrimitiveType.STRING); - Column col2 = new Column("c2", PrimitiveType.INT); - - OlapTable tbl1 = new OlapTable(0L, "tbl1", Arrays.asList(col1, col2), - KeysType.AGG_KEYS, new PartitionInfo(), new HashDistributionInfo()); - OlapTable tbl2 = new OlapTable(1L, "tbl2", Arrays.asList(col1, col2), - KeysType.DUP_KEYS, new PartitionInfo(), new HashDistributionInfo()); - - Database database = new Database(0L, "db"); - database.createTable(tbl1); - database.createTable(tbl2); - - InternalCatalog catalog = Env.getCurrentInternalCatalog(); - ConcurrentHashMap fullNameToDb = new ConcurrentHashMap<>(); - fullNameToDb.put("cluster:db", database); - Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb); - - ConcurrentHashMap idToDb = new ConcurrentHashMap<>(); - idToDb.put(0L, database); - Deencapsulation.setField(catalog, "idToDb", idToDb); - - new MockUp(SystemInfoService.class) { - @Mock - public List getBackendIds(boolean needAlive) { - return Collections.singletonList(1L); - } - }; - - new MockUp(OlapTable.class) { - @Mock - public long getDataSize() { - return 1L; - } - }; - - // Run the test - statisticsJobSchedulerUnderTest.runAfterCatalogReady(); - - /* - * expected results: - * mateTask(2): - * - tbl1: - * - task1: - * - data_size - * - max_size(c2) - * - avg_size(c2) - * - tbl2: - * - task: - * - row_count - * - data_size - * - max_size(c2) - * - avg_size(c2) - * - * sqlTask(11): - * - tbl1: - * - task: - * - ndv(c1) - * - min_value(c1) - * - max_value(c1) - * - task: - * - ndv(c2) - * - min_value(c2) - * - max_value(c2) - * - task: - * - max_size(c1) - * - avg_size(c1) - * - task: - * - num_nulls(c1) - * - task: - * - num_nulls(c2) - * - task - * - row_count - * - tbl2: - * - task: - * - ndv(c1) - * - min_value(c1) - * - max_value(c1) - * - task: - * - ndv(c2) - * - min_value(c2) - * - max_value(c2) - * - task: - * - max_size(c1) - * - avg_size(c1) - * - task: - * - num_nulls(c1) - * - task: - * - num_nulls(c2) - */ - - // Verify the results - List tasks = statisticsJob.getTasks(); - Assert.assertEquals(13, tasks.size()); - - int sqlTaskCount = 0; - int metaTaskCount = 0; - - for (StatisticsTask task : tasks) { - if (task instanceof SQLStatisticsTask) { - sqlTaskCount++; - } else if (task instanceof MetaStatisticsTask) { - metaTaskCount++; - } else { - Assert.fail("Unknown task type."); - } - } - - Assert.assertEquals(2, metaTaskCount); - Assert.assertEquals(11, sqlTaskCount); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobTest.java deleted file mode 100644 index eb6cd576e4..0000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobTest.java +++ /dev/null @@ -1,120 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.common.DdlException; -import org.apache.doris.statistics.StatisticsJob.JobState; -import org.apache.doris.statistics.StatisticsTask.TaskState; - -import com.google.common.collect.Maps; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; - -public class StatisticsJobTest { - private StatisticsJob statisticsJobUnderTest; - - private StatisticsTask statisticsTaskUnderTest; - - @Before - public void setUp() throws Exception { - HashSet tblIds = new HashSet<>(Collections.singletonList(0L)); - Map> tblIdToPartitionName = Maps.newHashMap(); - Map> tableIdToColumnName = Maps.newHashMap(); - statisticsJobUnderTest = new StatisticsJob(0L, tblIds, tblIdToPartitionName, - tableIdToColumnName, new HashMap<>()); - - StatsCategory statsCategory = new StatsCategory(); - StatsGranularity statsGranularity = new StatsGranularity(); - List statsTypes = Collections.singletonList(StatsType.ROW_COUNT); - statisticsTaskUnderTest = new SQLStatisticsTask(0L, - Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes))); - - List tasks = statisticsJobUnderTest.getTasks(); - tasks.add(statisticsTaskUnderTest); - } - - @Test - public void testUpdateJobState() throws Exception { - // Run the test - statisticsJobUnderTest.updateJobState(JobState.SCHEDULING); - - // Verify the results - JobState jobState = statisticsJobUnderTest.getJobState(); - Assert.assertEquals(JobState.SCHEDULING, jobState); - } - - @Test - public void testUpdateJobState_ThrowsDdlException() { - // Run the test - Assert.assertThrows(DdlException.class, - () -> statisticsJobUnderTest.updateJobState(JobState.RUNNING)); - } - - @Test - public void testUpdateJobInfoByTaskId() throws Exception { - // Setup - statisticsJobUnderTest.updateJobState(JobState.SCHEDULING); - statisticsJobUnderTest.updateJobState(JobState.RUNNING); - statisticsTaskUnderTest.updateTaskState(TaskState.RUNNING); - - // Run the test - long taskId = statisticsTaskUnderTest.getId(); - statisticsJobUnderTest.updateJobInfoByTaskId(taskId, ""); - - // Verify the results - JobState jobState = statisticsJobUnderTest.getJobState(); - Assert.assertEquals(JobState.FINISHED, jobState); - - TaskState taskState = statisticsTaskUnderTest.getTaskState(); - Assert.assertEquals(TaskState.FINISHED, taskState); - } - - @Test - public void testUpdateJobInfoByTaskIdFailed() throws Exception { - // Setup - statisticsJobUnderTest.updateJobState(JobState.SCHEDULING); - statisticsJobUnderTest.updateJobState(JobState.RUNNING); - statisticsTaskUnderTest.updateTaskState(TaskState.RUNNING); - - // Run the test - long taskId = statisticsTaskUnderTest.getId(); - statisticsJobUnderTest.updateJobInfoByTaskId(taskId, "errorMsg"); - - // Verify the results - JobState jobState = statisticsJobUnderTest.getJobState(); - Assert.assertEquals(JobState.FAILED, jobState); - - TaskState taskState = statisticsTaskUnderTest.getTaskState(); - Assert.assertEquals(TaskState.FAILED, taskState); - } - - @Test - public void testUpdateJobInfoByTaskId_ThrowsDdlException() { - // Run the test - long taskId = statisticsTaskUnderTest.getId(); - Assert.assertThrows(DdlException.class, - () -> statisticsJobUnderTest.updateJobInfoByTaskId(taskId, "")); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java deleted file mode 100644 index 84ab6560ba..0000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java +++ /dev/null @@ -1,167 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.analysis.DropTableStatsStmt; -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.HashDistributionInfo; -import org.apache.doris.catalog.KeysType; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.PartitionInfo; -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.jmockit.Deencapsulation; -import org.apache.doris.datasource.InternalCatalog; -import org.apache.doris.statistics.StatisticsTaskResult.TaskResult; -import org.apache.doris.statistics.StatsCategory.Category; -import org.apache.doris.statistics.StatsGranularity.Granularity; - -import com.google.common.collect.Maps; -import mockit.Expectations; -import mockit.Mocked; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; - - -public class StatisticsManagerTest { - private StatisticsManager statisticsManagerUnderTest; - - @Before - public void setUp() throws Exception { - Column col1 = new Column("c1", PrimitiveType.STRING); - Column col2 = new Column("c2", PrimitiveType.INT); - OlapTable tbl1 = new OlapTable(0L, "tbl1", Arrays.asList(col1, col2), KeysType.AGG_KEYS, - new PartitionInfo(), new HashDistributionInfo()); - OlapTable tbl2 = new OlapTable(1L, "tbl2", Arrays.asList(col1, col2), KeysType.DUP_KEYS, - new PartitionInfo(), new HashDistributionInfo()); - Database database = new Database(0L, "db"); - database.createTable(tbl1); - database.createTable(tbl2); - - InternalCatalog catalog = Env.getCurrentInternalCatalog(); - ConcurrentHashMap fullNameToDb = new ConcurrentHashMap<>(); - fullNameToDb.put("cluster:db", database); - Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb); - - ConcurrentHashMap idToDb = new ConcurrentHashMap<>(); - idToDb.put(0L, database); - Deencapsulation.setField(catalog, "idToDb", idToDb); - - statisticsManagerUnderTest = new StatisticsManager(); - } - - @Test - public void testUpdateStatistics() throws Exception { - // Setup - TaskResult taskResult = new TaskResult(); - taskResult.setDbId(0L); - taskResult.setTableId(0L); - taskResult.setCategory(Category.TABLE); - taskResult.setGranularity(Granularity.TABLE); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); - taskResult.setStatsTypeToValue(statsTypeToValue); - - List statsTaskResults = Collections.singletonList( - new StatisticsTaskResult(Collections.singletonList(taskResult))); - - // Run the test - statisticsManagerUnderTest.updateStatistics(statsTaskResults); - Statistics statistics = statisticsManagerUnderTest.getStatistics(); - TableStats tableStats = statistics.getTableStats(0L); - - // Verify the results - double rowCount = tableStats.getRowCount(); - Assert.assertEquals(1000L, rowCount, 0.1); - - long dataSize = tableStats.getDataSize(); - Assert.assertEquals(10240L, dataSize); - } - - @Test - public void testUpdateStatistics_ThrowsAnalysisException() { - // Setup - TaskResult taskResult = new TaskResult(); - taskResult.setDbId(0L); - taskResult.setTableId(1L); - taskResult.setPartitionName("partitionName"); - taskResult.setColumnName("columnName"); - taskResult.setCategory(Category.TABLE); - taskResult.setGranularity(Granularity.TABLE); - taskResult.setStatsTypeToValue(new HashMap<>()); - List statsTaskResults = Collections.singletonList( - new StatisticsTaskResult(Collections.singletonList(taskResult))); - - // Run the test - Assert.assertThrows(AnalysisException.class, - () -> statisticsManagerUnderTest.updateStatistics(statsTaskResults)); - } - - @Test - public void testDropStats(@Mocked DropTableStatsStmt stmt) throws AnalysisException { - TaskResult taskResult = new TaskResult(); - taskResult.setDbId(0L); - taskResult.setTableId(0L); - taskResult.setCategory(Category.TABLE); - taskResult.setGranularity(Granularity.TABLE); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); - taskResult.setStatsTypeToValue(statsTypeToValue); - - List statsTaskResults = Collections.singletonList( - new StatisticsTaskResult(Collections.singletonList(taskResult))); - statisticsManagerUnderTest.updateStatistics(statsTaskResults); - - Map> tblIdToPartition = Maps.newHashMap(); - tblIdToPartition.put(0L, null); - - new Expectations() { - { - stmt.getTblIdToPartition(); - this.minTimes = 0; - this.result = tblIdToPartition; - } - }; - - // Run the test - statisticsManagerUnderTest.dropStats(stmt); - - // Verify the results - Statistics statistics = statisticsManagerUnderTest.getStatistics(); - TableStats statsOrDefault = statistics.getTableStatsOrDefault(0L); - - double rowCount = statsOrDefault.getRowCount(); - Assert.assertEquals(-1.0f, rowCount, 0.0001); - - double dataSize = statsOrDefault.getDataSize(); - Assert.assertEquals(-1.0f, dataSize, 0.0001); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java deleted file mode 100644 index 843606dd37..0000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java +++ /dev/null @@ -1,267 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.AnalysisException; - -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.HashMap; -import java.util.Map; - -public class StatisticsTest { - private Statistics statisticsUnderTest; - - @Before - public void setUp() throws Exception { - statisticsUnderTest = new Statistics(); - } - - @Test - public void testUpdateTableStats() throws Exception { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - - // Run the test - statisticsUnderTest.updateTableStats(0L, statsTypeToValue); - long rowCount = (long) statisticsUnderTest.getTableStats(0L).getRowCount(); - - // Verify the results - Assert.assertEquals(1000L, rowCount); - } - - @Test - public void testUpdateTableStats_ThrowsAnalysisException() { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "-100"); - - // Run the test - Assert.assertThrows(AnalysisException.class, - () -> statisticsUnderTest.updateTableStats(0L, statsTypeToValue)); - } - - @Test - public void testUpdatePartitionStats() throws Exception { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - - // Run the test - statisticsUnderTest.updatePartitionStats(0L, "partitionName", statsTypeToValue); - Map partitionStats = statisticsUnderTest - .getPartitionStats(0L, "partitionName"); - long rowCount = partitionStats.get("partitionName").getRowCount(); - - // Verify the results - Assert.assertEquals(1000L, rowCount); - } - - @Test - public void testUpdatePartitionStats_ThrowsAnalysisException() { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "-100"); - - // Run the test - Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest - .updatePartitionStats(0L, "partitionName", statsTypeToValue)); - } - - @Test - public void testUpdateTableColumnStats() throws Exception { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.NUM_NULLS, "1000"); - - // Run the test - statisticsUnderTest.updateColumnStats(0L, "columnName", columnType, statsTypeToValue); - Map columnStats = statisticsUnderTest.getColumnStats(0L); - long numNulls = (long) columnStats.get("columnName").getNumNulls(); - - // Verify the results - Assert.assertEquals(1000L, numNulls); - } - - @Test - public void testUpdateTableColumnStats_ThrowsAnalysisException() { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.MAX_VALUE, "ABC"); - - // Run the test - Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest - .updateColumnStats(0L, "columnName", columnType, statsTypeToValue)); - } - - @Test - public void testUpdatePartitionColumnStats() throws Exception { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.NUM_NULLS, "1000"); - - // Run the test - statisticsUnderTest.updateColumnStats(0L, "partitionName", - "columnName", columnType, statsTypeToValue); - Map columnStats = statisticsUnderTest - .getColumnStats(0L, "partitionName"); - long numNulls = (long) columnStats.get("columnName").getNumNulls(); - - // Verify the results - Assert.assertEquals(1000L, numNulls); - } - - @Test - public void testUpdatePartitionColumnStats_ThrowsAnalysisException() { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "ABC"); - - // Run the test - Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest.updateColumnStats( - 0L, "partitionName", "columnName", columnType, statsTypeToValue)); - } - - @Test - public void testGetTableStats() throws Exception { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statisticsUnderTest.updateTableStats(0L, statsTypeToValue); - - // Run the test - TableStats result = statisticsUnderTest.getTableStats(0L); - - // Verify the results - double rowCount = result.getRowCount(); - Assert.assertEquals(1000, rowCount, 0.1); - } - - @Test - public void testGetTableStats_ThrowsAnalysisException() { - // Verify the results - Assert.assertThrows(AnalysisException.class, - () -> statisticsUnderTest.getTableStats(0L)); - } - - @Test - public void testGetPartitionStats() throws Exception { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statisticsUnderTest.updatePartitionStats(0L, "partitionName", statsTypeToValue); - - // Run the test - Map result = statisticsUnderTest.getPartitionStats(0L); - - // Verify the results - PartitionStats partitionStats = result.get("partitionName"); - long rowCount = partitionStats.getRowCount(); - Assert.assertEquals(1000, rowCount); - } - - @Test - public void testGetPartitionStats1_ThrowsAnalysisException() { - // Verify the results - Assert.assertThrows(AnalysisException.class, - () -> statisticsUnderTest.getPartitionStats(0L)); - } - - @Test - public void testGetPartitionStatsWithName() throws Exception { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statisticsUnderTest.updatePartitionStats(0L, "partitionName", statsTypeToValue); - - // Run the test - Map result = statisticsUnderTest - .getPartitionStats(0L, "partitionName"); - - // Verify the results - PartitionStats partitionStats = result.get("partitionName"); - long rowCount = partitionStats.getRowCount(); - Assert.assertEquals(1000, rowCount); - } - - @Test - public void testGetPartitionStatsWithName_ThrowsAnalysisException() { - // Run the test - Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest - .getPartitionStats(0L, "partitionName")); - } - - @Test - public void testGetTableColumnStats() throws Exception { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.NUM_NULLS, "1000"); - statisticsUnderTest.updateColumnStats(0L, "columnName", columnType, statsTypeToValue); - - // Run the test - Map result = statisticsUnderTest.getColumnStats(0L); - - // Verify the results - ColumnStat columnStats = result.get("columnName"); - double numNulls = columnStats.getNumNulls(); - Assert.assertEquals(1000, numNulls, 0.1); - } - - @Test - public void testGetTableColumnStats_ThrowsAnalysisException() { - // Verify the results - Assert.assertThrows(AnalysisException.class, - () -> statisticsUnderTest.getColumnStats(0L)); - } - - @Test - public void testGetPartitionColumnStats() throws Exception { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.NUM_NULLS, "1000"); - statisticsUnderTest.updateColumnStats(0L, "partitionName", - "columnName", columnType, statsTypeToValue); - - // Run the test - Map result = statisticsUnderTest - .getColumnStats(0L, "partitionName"); - - // Verify the results - ColumnStat columnStats = result.get("columnName"); - double numNulls = columnStats.getNumNulls(); - Assert.assertEquals(1000, numNulls, 0.1); - } - - @Test - public void testGetPartitionColumnStats_ThrowsAnalysisException() { - // Verify the results - Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest - .getColumnStats(0L, "partitionName")); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java deleted file mode 100644 index 9c6ccdd380..0000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java +++ /dev/null @@ -1,182 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.AnalysisException; - -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class TableStatsTest { - private TableStats tableStatsUnderTest; - - @Before - public void setUp() throws Exception { - tableStatsUnderTest = new TableStats(); - } - - @Test - public void testUpdateTableStats() throws Exception { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); - - // Run the test - tableStatsUnderTest.updateTableStats(statsTypeToValue); - - // Verify the results - double rowCount = tableStatsUnderTest.getRowCount(); - Assert.assertEquals(1000, rowCount, 0.01); - - long dataSize = tableStatsUnderTest.getDataSize(); - Assert.assertEquals(10240, dataSize); - } - - @Test - public void testUpdateTableStats_ThrowsAnalysisException() { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.AVG_SIZE, "8"); - statsTypeToValue.put(StatsType.ROW_COUNT, "abc"); - - // Run the test - Assert.assertThrows(AnalysisException.class, - () -> tableStatsUnderTest.updateTableStats(statsTypeToValue)); - } - - @Test - public void testUpdatePartitionStats() throws Exception { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); - - // Run the test - tableStatsUnderTest.updatePartitionStats("partitionName", statsTypeToValue); - PartitionStats partitionStats = tableStatsUnderTest.getNameToPartitionStats().get("partitionName"); - - // Verify the results - long rowCount = partitionStats.getRowCount(); - Assert.assertEquals(1000, rowCount); - - long dataSize = partitionStats.getDataSize(); - Assert.assertEquals(10240, dataSize); - } - - @Test - public void testUpdatePartitionStats_ThrowsAnalysisException() { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "abc"); - - // Run the test - Assert.assertThrows(AnalysisException.class, () -> tableStatsUnderTest - .updatePartitionStats("partitionName", statsTypeToValue)); - } - - @Test - public void testUpdateColumnStats() throws Exception { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.NDV, "1"); - statsTypeToValue.put(StatsType.AVG_SIZE, "8"); - statsTypeToValue.put(StatsType.MAX_SIZE, "8"); - statsTypeToValue.put(StatsType.NUM_NULLS, "2"); - statsTypeToValue.put(StatsType.MIN_VALUE, "0"); - statsTypeToValue.put(StatsType.MAX_VALUE, "1000"); - - // Run the test - tableStatsUnderTest.updateColumnStats("columnName", columnType, statsTypeToValue); - ColumnStat columnStats = tableStatsUnderTest.getColumnStats("columnName"); - - // Verify the results - double ndv = columnStats.getNdv(); - Assert.assertEquals(1L, ndv, 0.01); - - double avgSize = columnStats.getAvgSizeByte(); - Assert.assertEquals(8.0f, avgSize, 0.0001); - - double maxSize = columnStats.getMaxSizeByte(); - Assert.assertEquals(8L, maxSize, 0.01); - - double maxValue = columnStats.getMaxValue(); - Assert.assertEquals(1000, maxValue, 0.01); - - double minValue = columnStats.getMinValue(); - Assert.assertEquals(0L, minValue, 0.01); - - double numNulls = columnStats.getNumNulls(); - Assert.assertEquals(2, numNulls, 0.01); - } - - @Test - public void testUpdateColumnStats_ThrowsAnalysisException() { - // Setup - Type columnType = Type.fromPrimitiveType(PrimitiveType.INVALID_TYPE); - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.AVG_SIZE, "abc"); - // Run the test - Assert.assertThrows(AnalysisException.class, () -> tableStatsUnderTest - .updateColumnStats("columnName", columnType, statsTypeToValue)); - } - - @Test - public void testGetShowInfo() throws AnalysisException { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); - - tableStatsUnderTest.updateTableStats(statsTypeToValue); - String[] expectedInfo = {"1000.0", "10240"}; - - // Run the test - List showInfo = tableStatsUnderTest.getShowInfo(); - String[] result = showInfo.toArray(new String[0]); - - // Verify the results - Assert.assertArrayEquals(expectedInfo, result); - } - - @Test - public void testGetShowInfoWithPartitionName() throws AnalysisException { - // Setup - Map statsTypeToValue = new HashMap<>(); - statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); - statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); - - tableStatsUnderTest.updatePartitionStats("partitionName", statsTypeToValue); - String[] expectedInfo = {"1000", "10240"}; - - // Run the test - List showInfo = tableStatsUnderTest.getShowInfo("partitionName"); - String[] result = showInfo.toArray(new String[0]); - - // Verify the results - Assert.assertArrayEquals(expectedInfo, result); - } -}