[refactor](statistics) Remove deprecated statistics related codes (#14797)
This commit is contained in:
@ -1268,10 +1268,6 @@ alter_stmt ::=
|
||||
{:
|
||||
RESULT = new AlterSqlBlockRuleStmt(ruleName, properties);
|
||||
:}
|
||||
| KW_ALTER KW_TABLE table_name:tbl KW_SET KW_STATS LPAREN key_value_map:map RPAREN opt_partition_names:partitionNames
|
||||
{:
|
||||
RESULT = new AlterTableStatsStmt(tbl, map, partitionNames);
|
||||
:}
|
||||
| KW_ALTER KW_TABLE table_name:tbl KW_MODIFY KW_COLUMN ident:columnName
|
||||
KW_SET KW_STATS LPAREN key_value_map:map RPAREN opt_partition_names:partitionNames
|
||||
{:
|
||||
@ -3672,11 +3668,6 @@ show_param ::=
|
||||
{:
|
||||
RESULT = new ShowSyncJobStmt(dbName);
|
||||
:}
|
||||
/* show table stats */
|
||||
| KW_TABLE KW_STATS opt_table_name:tbl opt_partition_names:partitionNames
|
||||
{:
|
||||
RESULT = new ShowTableStatsStmt(tbl, partitionNames);
|
||||
:}
|
||||
/* show column stats */
|
||||
| KW_COLUMN KW_STATS table_name:tbl opt_partition_names:partitionNames
|
||||
{:
|
||||
|
||||
@ -31,7 +31,7 @@ import org.apache.doris.common.util.PrintableMap;
|
||||
import org.apache.doris.common.util.Util;
|
||||
import org.apache.doris.mysql.privilege.PrivPredicate;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.statistics.ColumnStat;
|
||||
import org.apache.doris.statistics.ColumnStatistic;
|
||||
import org.apache.doris.statistics.StatsType;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
@ -56,12 +56,12 @@ public class AlterColumnStatsStmt extends DdlStmt {
|
||||
|
||||
private static final ImmutableSet<StatsType> CONFIGURABLE_PROPERTIES_SET = new ImmutableSet.Builder<StatsType>()
|
||||
.add(StatsType.ROW_COUNT)
|
||||
.add(ColumnStat.NDV)
|
||||
.add(ColumnStat.AVG_SIZE)
|
||||
.add(ColumnStat.MAX_SIZE)
|
||||
.add(ColumnStat.NUM_NULLS)
|
||||
.add(ColumnStat.MIN_VALUE)
|
||||
.add(ColumnStat.MAX_VALUE)
|
||||
.add(ColumnStatistic.NDV)
|
||||
.add(ColumnStatistic.AVG_SIZE)
|
||||
.add(ColumnStatistic.MAX_SIZE)
|
||||
.add(ColumnStatistic.NUM_NULLS)
|
||||
.add(ColumnStatistic.MIN_VALUE)
|
||||
.add(ColumnStatistic.MAX_VALUE)
|
||||
.add(StatsType.DATA_SIZE)
|
||||
.build();
|
||||
|
||||
|
||||
@ -1,165 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.analysis;
|
||||
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.Table;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.ErrorCode;
|
||||
import org.apache.doris.common.ErrorReport;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.common.util.PrintableMap;
|
||||
import org.apache.doris.common.util.Util;
|
||||
import org.apache.doris.mysql.privilege.PrivPredicate;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.statistics.StatsType;
|
||||
import org.apache.doris.statistics.TableStats;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Manually inject statistics for tables or partitions.
|
||||
* Only OLAP table statistics are supported.
|
||||
*
|
||||
* syntax:
|
||||
* ALTER TABLE table_name
|
||||
* SET STATS ('k1' = 'v1', ...) [ PARTITIONS(p_name1, p_name2...) ]
|
||||
*/
|
||||
public class AlterTableStatsStmt extends DdlStmt {
|
||||
|
||||
private static final ImmutableSet<StatsType> CONFIGURABLE_PROPERTIES_SET =
|
||||
new ImmutableSet.Builder<StatsType>()
|
||||
.add(TableStats.DATA_SIZE)
|
||||
.add(TableStats.ROW_COUNT)
|
||||
.build();
|
||||
|
||||
private final TableName tableName;
|
||||
private final PartitionNames optPartitionNames;
|
||||
private final Map<String, String> properties;
|
||||
|
||||
private final List<String> partitionNames = Lists.newArrayList();
|
||||
private final Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
|
||||
|
||||
public AlterTableStatsStmt(TableName tableName, Map<String, String> properties,
|
||||
PartitionNames optPartitionNames) {
|
||||
this.tableName = tableName;
|
||||
this.properties = properties == null ? Maps.newHashMap() : properties;
|
||||
this.optPartitionNames = optPartitionNames;
|
||||
}
|
||||
|
||||
public TableName getTableName() {
|
||||
return tableName;
|
||||
}
|
||||
|
||||
public List<String> getPartitionNames() {
|
||||
return partitionNames;
|
||||
}
|
||||
|
||||
public Map<StatsType, String> getStatsTypeToValue() {
|
||||
return statsTypeToValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void analyze(Analyzer analyzer) throws UserException {
|
||||
super.analyze(analyzer);
|
||||
|
||||
// check table name
|
||||
tableName.analyze(analyzer);
|
||||
|
||||
// disallow external catalog
|
||||
Util.prohibitExternalCatalog(tableName.getCtl(), this.getClass().getSimpleName());
|
||||
|
||||
// check partition
|
||||
checkPartitionNames();
|
||||
|
||||
// check properties
|
||||
Optional<StatsType> optional = properties.keySet().stream().map(StatsType::fromString)
|
||||
.filter(statsType -> !CONFIGURABLE_PROPERTIES_SET.contains(statsType))
|
||||
.findFirst();
|
||||
if (optional.isPresent()) {
|
||||
throw new AnalysisException(optional.get() + " is invalid statistics");
|
||||
}
|
||||
|
||||
// check auth
|
||||
if (!Env.getCurrentEnv().getAuth()
|
||||
.checkTblPriv(ConnectContext.get(), tableName.getDb(), tableName.getTbl(), PrivPredicate.ALTER)) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "ALTER TABLE STATS",
|
||||
ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(),
|
||||
tableName.getDb() + ": " + tableName.getTbl());
|
||||
}
|
||||
|
||||
// get statsTypeToValue
|
||||
properties.forEach((key, value) -> {
|
||||
StatsType statsType = StatsType.fromString(key);
|
||||
statsTypeToValue.put(statsType, value);
|
||||
});
|
||||
}
|
||||
|
||||
private void checkPartitionNames() throws AnalysisException {
|
||||
Database db = analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(tableName.getDb());
|
||||
Table table = db.getTableOrAnalysisException(tableName.getTbl());
|
||||
|
||||
if (table.getType() != Table.TableType.OLAP) {
|
||||
throw new AnalysisException("Only OLAP table statistics are supported");
|
||||
}
|
||||
|
||||
if (optPartitionNames != null) {
|
||||
OlapTable olapTable = (OlapTable) table;
|
||||
|
||||
if (!olapTable.isPartitioned()) {
|
||||
throw new AnalysisException("Not a partitioned table: " + olapTable.getName());
|
||||
}
|
||||
|
||||
optPartitionNames.analyze(analyzer);
|
||||
List<String> names = optPartitionNames.getPartitionNames();
|
||||
Set<String> olapPartitionNames = olapTable.getPartitionNames();
|
||||
Optional<String> optional = names.stream()
|
||||
.filter(name -> !olapPartitionNames.contains(name))
|
||||
.findFirst();
|
||||
if (optional.isPresent()) {
|
||||
throw new AnalysisException("Partition does not exist: " + optional.get());
|
||||
}
|
||||
partitionNames.addAll(names);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toSql() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("ALTER TABLE ");
|
||||
sb.append(tableName.toSql());
|
||||
sb.append(" SET STATS ");
|
||||
sb.append("(");
|
||||
sb.append(new PrintableMap<>(properties,
|
||||
" = ", true, false));
|
||||
sb.append(") ");
|
||||
if (optPartitionNames != null) {
|
||||
sb.append(optPartitionNames.toSql());
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
@ -31,7 +31,6 @@ import org.apache.doris.mysql.privilege.PaloAuth;
|
||||
import org.apache.doris.mysql.privilege.PrivPredicate;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.qe.ShowResultSetMetaData;
|
||||
import org.apache.doris.statistics.StatisticsJob;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Strings;
|
||||
@ -271,7 +270,7 @@ public class ShowAnalyzeStmt extends ShowStmt {
|
||||
|
||||
stateValue = value.toUpperCase();
|
||||
try {
|
||||
StatisticsJob.JobState.valueOf(stateValue);
|
||||
// support it later
|
||||
} catch (Exception e) {
|
||||
valid = false;
|
||||
}
|
||||
|
||||
@ -1,110 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.analysis;
|
||||
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.ScalarType;
|
||||
import org.apache.doris.common.ErrorCode;
|
||||
import org.apache.doris.common.ErrorReport;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.common.util.Util;
|
||||
import org.apache.doris.qe.ShowResultSetMetaData;
|
||||
import org.apache.doris.statistics.TableStats;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class ShowTableStatsStmt extends ShowStmt {
|
||||
|
||||
private static final ImmutableList<String> TITLE_NAMES =
|
||||
new ImmutableList.Builder<String>()
|
||||
.add("table_name")
|
||||
.add(TableStats.ROW_COUNT.getValue())
|
||||
.add(TableStats.DATA_SIZE.getValue())
|
||||
.build();
|
||||
|
||||
private final TableName tableName;
|
||||
|
||||
// after analyzed
|
||||
// There is only on attribute for both @tableName and @dbName at the same time.
|
||||
private String dbName;
|
||||
|
||||
private final PartitionNames partitionNames;
|
||||
|
||||
public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames) {
|
||||
this.tableName = tableName;
|
||||
this.partitionNames = partitionNames;
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
Preconditions.checkArgument(isAnalyzed(), "The db name must be obtained after the parsing is complete");
|
||||
if (tableName == null) {
|
||||
return null;
|
||||
}
|
||||
return tableName.getTbl();
|
||||
}
|
||||
|
||||
public String getDbName() {
|
||||
Preconditions.checkArgument(isAnalyzed(), "The db name must be obtained after the parsing is complete");
|
||||
if (tableName == null) {
|
||||
return dbName;
|
||||
}
|
||||
return tableName.getDb();
|
||||
}
|
||||
|
||||
public List<String> getPartitionNames() {
|
||||
if (partitionNames == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return partitionNames.getPartitionNames();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void analyze(Analyzer analyzer) throws UserException {
|
||||
super.analyze(analyzer);
|
||||
if (tableName == null) {
|
||||
dbName = analyzer.getDefaultDb();
|
||||
if (Strings.isNullOrEmpty(dbName)) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_NO_DB_ERROR);
|
||||
}
|
||||
return;
|
||||
}
|
||||
tableName.analyze(analyzer);
|
||||
|
||||
if (partitionNames != null) {
|
||||
partitionNames.analyze(analyzer);
|
||||
}
|
||||
|
||||
// disallow external catalog
|
||||
Util.prohibitExternalCatalog(tableName.getCtl(), this.getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public ShowResultSetMetaData getMetaData() {
|
||||
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();
|
||||
|
||||
for (String title : TITLE_NAMES) {
|
||||
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
}
|
||||
@ -211,10 +211,6 @@ import org.apache.doris.service.FrontendOptions;
|
||||
import org.apache.doris.statistics.AnalysisManager;
|
||||
import org.apache.doris.statistics.AnalysisTaskScheduler;
|
||||
import org.apache.doris.statistics.StatisticsCache;
|
||||
import org.apache.doris.statistics.StatisticsJobManager;
|
||||
import org.apache.doris.statistics.StatisticsJobScheduler;
|
||||
import org.apache.doris.statistics.StatisticsManager;
|
||||
import org.apache.doris.statistics.StatisticsTaskScheduler;
|
||||
import org.apache.doris.system.Backend;
|
||||
import org.apache.doris.system.FQDNManager;
|
||||
import org.apache.doris.system.Frontend;
|
||||
@ -396,11 +392,6 @@ public class Env {
|
||||
private DeployManager deployManager;
|
||||
|
||||
private TabletStatMgr tabletStatMgr;
|
||||
// statistics
|
||||
private StatisticsManager statisticsManager;
|
||||
private StatisticsJobManager statisticsJobManager;
|
||||
private StatisticsJobScheduler statisticsJobScheduler;
|
||||
private StatisticsTaskScheduler statisticsTaskScheduler;
|
||||
|
||||
private PaloAuth auth;
|
||||
|
||||
@ -594,11 +585,6 @@ public class Env {
|
||||
this.globalTransactionMgr = new GlobalTransactionMgr(this);
|
||||
|
||||
this.tabletStatMgr = new TabletStatMgr();
|
||||
// statistics
|
||||
this.statisticsManager = new StatisticsManager();
|
||||
this.statisticsJobManager = new StatisticsJobManager();
|
||||
this.statisticsJobScheduler = new StatisticsJobScheduler();
|
||||
this.statisticsTaskScheduler = new StatisticsTaskScheduler();
|
||||
|
||||
this.auth = new PaloAuth();
|
||||
this.domainResolver = new DomainResolver(auth);
|
||||
@ -756,23 +742,6 @@ public class Env {
|
||||
return checkpointer;
|
||||
}
|
||||
|
||||
// statistics
|
||||
public StatisticsManager getStatisticsManager() {
|
||||
return statisticsManager;
|
||||
}
|
||||
|
||||
public StatisticsJobManager getStatisticsJobManager() {
|
||||
return statisticsJobManager;
|
||||
}
|
||||
|
||||
public StatisticsJobScheduler getStatisticsJobScheduler() {
|
||||
return statisticsJobScheduler;
|
||||
}
|
||||
|
||||
public StatisticsTaskScheduler getStatisticsTaskScheduler() {
|
||||
return statisticsTaskScheduler;
|
||||
}
|
||||
|
||||
// Use tryLock to avoid potential dead lock
|
||||
private boolean tryLock(boolean mustLock) {
|
||||
while (true) {
|
||||
@ -1429,8 +1398,6 @@ public class Env {
|
||||
partitionInMemoryInfoCollector.start();
|
||||
streamLoadRecordMgr.start();
|
||||
getInternalCatalog().getIcebergTableCreationRecordMgr().start();
|
||||
this.statisticsJobScheduler.start();
|
||||
this.statisticsTaskScheduler.start();
|
||||
new InternalSchemaInitializer().start();
|
||||
if (Config.enable_fqdn_mode) {
|
||||
fqdnManager.start();
|
||||
|
||||
@ -484,15 +484,12 @@ public class OlapScanNode extends ScanNode {
|
||||
* Remove the method after statistics collection is working properly
|
||||
*/
|
||||
public void mockRowCountInStatistic() {
|
||||
long tableId = desc.getTable().getId();
|
||||
cardinality = 0;
|
||||
for (long selectedPartitionId : selectedPartitionIds) {
|
||||
final Partition partition = olapTable.getPartition(selectedPartitionId);
|
||||
final MaterializedIndex baseIndex = partition.getBaseIndex();
|
||||
cardinality += baseIndex.getRowCount();
|
||||
}
|
||||
Env.getCurrentEnv().getStatisticsManager()
|
||||
.getStatistics().mockTableStatsWithRowCount(tableId, cardinality);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -39,7 +39,6 @@ import org.apache.doris.analysis.AlterResourceStmt;
|
||||
import org.apache.doris.analysis.AlterRoutineLoadStmt;
|
||||
import org.apache.doris.analysis.AlterSqlBlockRuleStmt;
|
||||
import org.apache.doris.analysis.AlterSystemStmt;
|
||||
import org.apache.doris.analysis.AlterTableStatsStmt;
|
||||
import org.apache.doris.analysis.AlterTableStmt;
|
||||
import org.apache.doris.analysis.AlterUserStmt;
|
||||
import org.apache.doris.analysis.AlterViewStmt;
|
||||
@ -165,8 +164,6 @@ public class DdlExecutor {
|
||||
env.createMaterializedView((CreateMaterializedViewStmt) ddlStmt);
|
||||
} else if (ddlStmt instanceof AlterTableStmt) {
|
||||
env.alterTable((AlterTableStmt) ddlStmt);
|
||||
} else if (ddlStmt instanceof AlterTableStatsStmt) {
|
||||
env.getStatisticsManager().alterTableStatistics((AlterTableStatsStmt) ddlStmt);
|
||||
} else if (ddlStmt instanceof AlterColumnStatsStmt) {
|
||||
StatisticsRepository.alterColumnStatistics((AlterColumnStatsStmt) ddlStmt);
|
||||
} else if (ddlStmt instanceof AlterViewStmt) {
|
||||
@ -342,7 +339,7 @@ public class DdlExecutor {
|
||||
} else if (ddlStmt instanceof AlterUserStmt) {
|
||||
env.getAuth().alterUser((AlterUserStmt) ddlStmt);
|
||||
} else if (ddlStmt instanceof DropTableStatsStmt) {
|
||||
env.getStatisticsManager().dropStats((DropTableStatsStmt) ddlStmt);
|
||||
// TODO: support later
|
||||
} else {
|
||||
throw new DdlException("Unknown statement.");
|
||||
}
|
||||
|
||||
@ -85,7 +85,6 @@ import org.apache.doris.analysis.ShowStreamLoadStmt;
|
||||
import org.apache.doris.analysis.ShowSyncJobStmt;
|
||||
import org.apache.doris.analysis.ShowTableCreationStmt;
|
||||
import org.apache.doris.analysis.ShowTableIdStmt;
|
||||
import org.apache.doris.analysis.ShowTableStatsStmt;
|
||||
import org.apache.doris.analysis.ShowTableStatusStmt;
|
||||
import org.apache.doris.analysis.ShowTableStmt;
|
||||
import org.apache.doris.analysis.ShowTabletStmt;
|
||||
@ -179,7 +178,6 @@ import org.apache.doris.mtmv.metadata.MTMVJob;
|
||||
import org.apache.doris.mtmv.metadata.MTMVTask;
|
||||
import org.apache.doris.mysql.privilege.PrivPredicate;
|
||||
import org.apache.doris.statistics.ColumnStatistic;
|
||||
import org.apache.doris.statistics.StatisticsJobManager;
|
||||
import org.apache.doris.statistics.StatisticsRepository;
|
||||
import org.apache.doris.system.Backend;
|
||||
import org.apache.doris.system.Diagnoser;
|
||||
@ -360,8 +358,6 @@ public class ShowExecutor {
|
||||
handleShowSyncJobs();
|
||||
} else if (stmt instanceof ShowSqlBlockRuleStmt) {
|
||||
handleShowSqlBlockRule();
|
||||
} else if (stmt instanceof ShowTableStatsStmt) {
|
||||
handleShowTableStats();
|
||||
} else if (stmt instanceof ShowColumnStatsStmt) {
|
||||
handleShowColumnStats();
|
||||
} else if (stmt instanceof ShowTableCreationStmt) {
|
||||
@ -2141,12 +2137,6 @@ public class ShowExecutor {
|
||||
|
||||
}
|
||||
|
||||
private void handleShowTableStats() throws AnalysisException {
|
||||
ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt;
|
||||
List<List<String>> results = Env.getCurrentEnv().getStatisticsManager().showTableStatsList(showTableStatsStmt);
|
||||
resultSet = new ShowResultSet(showTableStatsStmt.getMetaData(), results);
|
||||
}
|
||||
|
||||
private void handleShowColumnStats() throws AnalysisException {
|
||||
ShowColumnStatsStmt showColumnStatsStmt = (ShowColumnStatsStmt) stmt;
|
||||
TableName tableName = showColumnStatsStmt.getTableName();
|
||||
@ -2313,10 +2303,7 @@ public class ShowExecutor {
|
||||
}
|
||||
|
||||
private void handleShowAnalyze() throws AnalysisException {
|
||||
ShowAnalyzeStmt showStmt = (ShowAnalyzeStmt) stmt;
|
||||
StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager();
|
||||
List<List<String>> results = jobManager.getAnalyzeJobInfos(showStmt);
|
||||
resultSet = new ShowResultSet(showStmt.getMetaData(), results);
|
||||
// TODO: Support later
|
||||
}
|
||||
|
||||
private void handleCopyTablet() throws AnalysisException {
|
||||
|
||||
@ -102,9 +102,9 @@ public class AnalysisTaskExecutor extends Thread {
|
||||
|
||||
private void doFetchAndExecute() {
|
||||
BaseAnalysisTask task = taskScheduler.getPendingTasks();
|
||||
AnalysisTaskWrapper jobWrapper = new AnalysisTaskWrapper(this, task);
|
||||
AnalysisTaskWrapper taskWrapper = new AnalysisTaskWrapper(this, task);
|
||||
incr();
|
||||
executors.submit(jobWrapper);
|
||||
executors.submit(taskWrapper);
|
||||
Env.getCurrentEnv().getAnalysisManager()
|
||||
.updateTaskStatus(task.info,
|
||||
AnalysisState.RUNNING, "", System.currentTimeMillis());
|
||||
|
||||
@ -1,332 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.analysis.LiteralExpr;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.util.Util;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
/**
|
||||
* There are the statistics of column.
|
||||
* The column stats are mainly used to provide input for the Optimizer's cost model.
|
||||
* <p>
|
||||
* The description of column stats are following:
|
||||
* 1. @ndv: The number distinct values of column.
|
||||
* 2. @avgSize: The average size of column. The unit is bytes.
|
||||
* 3. @maxSize: The max size of column. The unit is bytes.
|
||||
* 4. @numNulls: The number of nulls.
|
||||
* 5. @minValue: The min value of column.
|
||||
* 6. @maxValue: The max value of column.
|
||||
* <p>
|
||||
* The granularity of the statistics is whole table.
|
||||
* For example:
|
||||
* "@ndv = 10" means that the number distinct values is 10 in the whole table.
|
||||
*/
|
||||
public class ColumnStat {
|
||||
|
||||
public static final StatsType NDV = StatsType.NDV;
|
||||
public static final StatsType AVG_SIZE = StatsType.AVG_SIZE;
|
||||
public static final StatsType MAX_SIZE = StatsType.MAX_SIZE;
|
||||
public static final StatsType NUM_NULLS = StatsType.NUM_NULLS;
|
||||
public static final StatsType MIN_VALUE = StatsType.MIN_VALUE;
|
||||
public static final StatsType MAX_VALUE = StatsType.MAX_VALUE;
|
||||
|
||||
public static final ColumnStat UNKNOWN = new ColumnStat();
|
||||
|
||||
private static final Predicate<Double> DESIRED_NDV_PRED = (v) -> v >= -1L;
|
||||
private static final Predicate<Double> DESIRED_AVG_SIZE_PRED = (v) -> (v == -1) || (v >= 0);
|
||||
private static final Predicate<Double> DESIRED_MAX_SIZE_PRED = (v) -> v >= -1L;
|
||||
private static final Predicate<Double> DESIRED_NUM_NULLS_PRED = (v) -> v >= -1L;
|
||||
|
||||
public static final Set<Type> MAX_MIN_UNSUPPORTED_TYPE = new HashSet<>();
|
||||
|
||||
static {
|
||||
MAX_MIN_UNSUPPORTED_TYPE.add(Type.HLL);
|
||||
MAX_MIN_UNSUPPORTED_TYPE.add(Type.BITMAP);
|
||||
MAX_MIN_UNSUPPORTED_TYPE.add(Type.ARRAY);
|
||||
MAX_MIN_UNSUPPORTED_TYPE.add(Type.STRUCT);
|
||||
MAX_MIN_UNSUPPORTED_TYPE.add(Type.MAP);
|
||||
}
|
||||
|
||||
private double ndv = -1;
|
||||
private double avgSizeByte = -1;
|
||||
private double maxSizeByte = -1;
|
||||
private double numNulls = -1;
|
||||
private double minValue = Double.NaN;
|
||||
private double maxValue = Double.NaN;
|
||||
// For display only.
|
||||
private LiteralExpr minExpr;
|
||||
private LiteralExpr maxExpr;
|
||||
|
||||
private double selectivity = 1.0;
|
||||
|
||||
public static ColumnStat createDefaultColumnStats() {
|
||||
ColumnStat columnStat = new ColumnStat();
|
||||
columnStat.setAvgSizeByte(1);
|
||||
columnStat.setMaxSizeByte(1);
|
||||
columnStat.setNdv(1);
|
||||
columnStat.setNumNulls(0);
|
||||
return columnStat;
|
||||
}
|
||||
|
||||
public static boolean isUnKnown(ColumnStat stats) {
|
||||
return stats == UNKNOWN;
|
||||
}
|
||||
|
||||
public ColumnStat() {
|
||||
}
|
||||
|
||||
public ColumnStat(ColumnStat other) {
|
||||
this.ndv = other.ndv;
|
||||
this.avgSizeByte = other.avgSizeByte;
|
||||
this.maxSizeByte = other.maxSizeByte;
|
||||
this.numNulls = other.numNulls;
|
||||
this.minValue = other.minValue;
|
||||
this.maxValue = other.maxValue;
|
||||
this.selectivity = other.selectivity;
|
||||
}
|
||||
|
||||
public ColumnStat(double ndv, double avgSizeByte,
|
||||
double maxSizeByte, double numNulls, double minValue, double maxValue) {
|
||||
this.ndv = ndv;
|
||||
this.avgSizeByte = avgSizeByte;
|
||||
this.maxSizeByte = maxSizeByte;
|
||||
this.numNulls = numNulls;
|
||||
this.minValue = minValue;
|
||||
this.maxValue = maxValue;
|
||||
}
|
||||
|
||||
public double getNdv() {
|
||||
return ndv;
|
||||
}
|
||||
|
||||
public double getAvgSizeByte() {
|
||||
return avgSizeByte;
|
||||
}
|
||||
|
||||
public double getMaxSizeByte() {
|
||||
return maxSizeByte;
|
||||
}
|
||||
|
||||
public double getNumNulls() {
|
||||
return numNulls;
|
||||
}
|
||||
|
||||
public double getMinValue() {
|
||||
return minValue;
|
||||
}
|
||||
|
||||
public double getMaxValue() {
|
||||
return maxValue;
|
||||
}
|
||||
|
||||
public void setNdv(double ndv) {
|
||||
this.ndv = ndv;
|
||||
}
|
||||
|
||||
public void setAvgSizeByte(double avgSizeByte) {
|
||||
this.avgSizeByte = avgSizeByte;
|
||||
}
|
||||
|
||||
public void setMaxSizeByte(double maxSizeByte) {
|
||||
this.maxSizeByte = maxSizeByte;
|
||||
}
|
||||
|
||||
public void setNumNulls(double numNulls) {
|
||||
this.numNulls = numNulls;
|
||||
}
|
||||
|
||||
public void setMinValue(double minValue) {
|
||||
this.minValue = minValue;
|
||||
}
|
||||
|
||||
public void setMaxValue(double maxValue) {
|
||||
this.maxValue = maxValue;
|
||||
}
|
||||
|
||||
public void updateStats(Type columnType, Map<StatsType, String> statsTypeToValue) throws AnalysisException {
|
||||
for (Map.Entry<StatsType, String> entry : statsTypeToValue.entrySet()) {
|
||||
StatsType statsType = entry.getKey();
|
||||
switch (statsType) {
|
||||
case NDV:
|
||||
ndv = Util.getDoublePropertyOrDefault(entry.getValue(), ndv,
|
||||
DESIRED_NDV_PRED, NDV + " should >= -1");
|
||||
break;
|
||||
case AVG_SIZE:
|
||||
avgSizeByte = Util.getDoublePropertyOrDefault(entry.getValue(), avgSizeByte,
|
||||
DESIRED_AVG_SIZE_PRED, AVG_SIZE + " should (>=0) or (=-1)");
|
||||
break;
|
||||
case MAX_SIZE:
|
||||
maxSizeByte = Util.getDoublePropertyOrDefault(entry.getValue(), maxSizeByte,
|
||||
DESIRED_MAX_SIZE_PRED, MAX_SIZE + " should >=-1");
|
||||
break;
|
||||
case NUM_NULLS:
|
||||
numNulls = Util.getDoublePropertyOrDefault(entry.getValue(), numNulls,
|
||||
DESIRED_NUM_NULLS_PRED, NUM_NULLS + " should >=-1");
|
||||
break;
|
||||
case MIN_VALUE:
|
||||
if (MAX_MIN_UNSUPPORTED_TYPE.contains(statsType)) {
|
||||
minValue = Double.NEGATIVE_INFINITY;
|
||||
} else {
|
||||
minExpr = StatisticsUtil.readableValue(columnType, entry.getValue());
|
||||
minValue = StatisticsUtil.convertToDouble(columnType, entry.getValue());
|
||||
}
|
||||
break;
|
||||
case MAX_VALUE:
|
||||
if (MAX_MIN_UNSUPPORTED_TYPE.contains(statsType)) {
|
||||
maxValue = Double.NEGATIVE_INFINITY;
|
||||
} else {
|
||||
maxExpr = StatisticsUtil.readableValue(columnType, entry.getValue());
|
||||
maxValue = StatisticsUtil.convertToDouble(columnType, entry.getValue());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new AnalysisException("Unknown stats type: " + statsType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> getShowInfo() {
|
||||
List<String> result = Lists.newArrayList();
|
||||
result.add(Double.toString(ndv));
|
||||
result.add(Double.toString(avgSizeByte));
|
||||
result.add(Double.toString(maxSizeByte));
|
||||
result.add(Double.toString(numNulls));
|
||||
result.add(Double.toString(minValue));
|
||||
result.add(Double.toString(maxValue));
|
||||
return result;
|
||||
}
|
||||
|
||||
public ColumnStat copy() {
|
||||
return new ColumnStat(this);
|
||||
}
|
||||
|
||||
|
||||
public boolean hasIntersect(ColumnStat another) {
|
||||
double leftMin = this.getMinValue();
|
||||
double rightMin = another.getMinValue();
|
||||
double leftMax = this.getMaxValue();
|
||||
double rightMax = another.getMaxValue();
|
||||
return Math.max(leftMin, rightMin) <= Math.min(leftMax, rightMax);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return default column statistic.
|
||||
*/
|
||||
public static ColumnStat getDefaultColumnStats() {
|
||||
return new ColumnStat();
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge column statistics(the original statistics should not be modified)
|
||||
*
|
||||
* @param left statistics to be merged
|
||||
* @param right statistics to be merged
|
||||
*/
|
||||
public static ColumnStat mergeColumnStats(ColumnStat left, ColumnStat right) {
|
||||
// merge ndv
|
||||
double leftNdv = left.getNdv();
|
||||
double rightNdv = right.getNdv();
|
||||
|
||||
if (leftNdv == -1) {
|
||||
leftNdv = rightNdv;
|
||||
} else {
|
||||
leftNdv = rightNdv != -1 ? (leftNdv + rightNdv) : leftNdv;
|
||||
}
|
||||
|
||||
double leftAvgSize = left.getAvgSizeByte();
|
||||
double rightAvgSize = right.getAvgSizeByte();
|
||||
if (leftAvgSize == -1) {
|
||||
leftAvgSize = rightAvgSize;
|
||||
} else {
|
||||
leftAvgSize = rightAvgSize != -1 ? ((leftAvgSize + rightAvgSize) / 2) : leftAvgSize;
|
||||
}
|
||||
|
||||
// merge max_size
|
||||
double leftMaxSize = left.getMaxSizeByte();
|
||||
double rightMaxSize = right.getMaxSizeByte();
|
||||
if (leftMaxSize == -1) {
|
||||
leftMaxSize = rightMaxSize;
|
||||
} else {
|
||||
leftMaxSize = Math.max(leftMaxSize, rightMaxSize);
|
||||
}
|
||||
|
||||
// merge num_nulls
|
||||
double leftNumNulls = left.getNumNulls();
|
||||
double rightNumNulls = right.getNumNulls();
|
||||
if (leftNumNulls == -1) {
|
||||
leftNumNulls = rightNumNulls;
|
||||
} else {
|
||||
leftNumNulls = rightNumNulls != -1 ? (leftNumNulls + rightNumNulls) : leftNumNulls;
|
||||
}
|
||||
|
||||
// merge min_value
|
||||
double leftMinValue = left.getMinValue();
|
||||
double rightMinValue = right.getMinValue();
|
||||
leftMinValue = Math.min(leftMinValue, rightMinValue);
|
||||
|
||||
// merge max_value
|
||||
double leftMaxValue = left.getMaxValue();
|
||||
double rightMaxValue = right.getMaxValue();
|
||||
leftMaxValue = Math.max(rightMaxValue, leftMaxValue);
|
||||
|
||||
// generate the new merged-statistics
|
||||
return new ColumnStat(leftNdv, leftAvgSize, leftMaxSize, leftNumNulls, leftMinValue, leftMaxValue);
|
||||
}
|
||||
|
||||
public static boolean isAlmostUnique(double ndv, double rowCount) {
|
||||
return rowCount * 0.9 < ndv && ndv < rowCount * 1.1;
|
||||
}
|
||||
|
||||
public double getSelectivity() {
|
||||
return selectivity;
|
||||
}
|
||||
|
||||
public void setSelectivity(double selectivity) {
|
||||
this.selectivity = selectivity;
|
||||
}
|
||||
|
||||
public double ndvIntersection(ColumnStat other) {
|
||||
if (maxValue == minValue) {
|
||||
if (minValue <= other.maxValue && minValue >= other.minValue) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
double min = Math.max(minValue, other.minValue);
|
||||
double max = Math.min(maxValue, other.maxValue);
|
||||
if (min < max) {
|
||||
return Math.ceil(ndv * (max - min) / (maxValue - minValue));
|
||||
} else if (min > max) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -36,6 +36,13 @@ import java.util.Set;
|
||||
|
||||
public class ColumnStatistic {
|
||||
|
||||
public static final StatsType NDV = StatsType.NDV;
|
||||
public static final StatsType AVG_SIZE = StatsType.AVG_SIZE;
|
||||
public static final StatsType MAX_SIZE = StatsType.MAX_SIZE;
|
||||
public static final StatsType NUM_NULLS = StatsType.NUM_NULLS;
|
||||
public static final StatsType MIN_VALUE = StatsType.MIN_VALUE;
|
||||
public static final StatsType MAX_VALUE = StatsType.MAX_VALUE;
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(StmtExecutor.class);
|
||||
|
||||
public static ColumnStatistic DEFAULT = new ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1)
|
||||
@ -137,6 +144,10 @@ public class ColumnStatistic {
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isAlmostUnique(double ndv, double rowCount) {
|
||||
return rowCount * 0.9 < ndv && ndv < rowCount * 1.1;
|
||||
}
|
||||
|
||||
public ColumnStatistic copy() {
|
||||
return new ColumnStatisticBuilder().setCount(count).setNdv(ndv).setAvgSizeByte(avgSizeByte)
|
||||
.setNumNulls(numNulls).setDataSize(dataSize).setMinValue(minValue)
|
||||
@ -186,7 +197,7 @@ public class ColumnStatistic {
|
||||
}
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(this);
|
||||
Double rowsAfterFilter = rowCount * selectivity;
|
||||
if (ColumnStat.isAlmostUnique(ndv, rowCount)) {
|
||||
if (isAlmostUnique(ndv, rowCount)) {
|
||||
builder.setSelectivity(this.selectivity * selectivity);
|
||||
builder.setNdv(ndv * selectivity);
|
||||
} else {
|
||||
|
||||
@ -1,147 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.Partition;
|
||||
import org.apache.doris.catalog.Table;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A statistics task that directly collects statistics by reading FE meta.
|
||||
* e.g. for fixed-length types such as Int type and Long type we get their size from metadata.
|
||||
* 1.The granularity of row count can be table or partition, and the type should be table or partition
|
||||
* 2.The granularity of data size can be table or partition, and the type should be table or partition
|
||||
* 3.The granularity of max and min size can be table or partition, and the type should be column
|
||||
*/
|
||||
public class MetaStatisticsTask extends StatisticsTask {
|
||||
public MetaStatisticsTask(long jobId, List<StatisticsDesc> statsDescs) {
|
||||
super(jobId, statsDescs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StatisticsTaskResult call() throws Exception {
|
||||
checkStatisticsDesc();
|
||||
List<TaskResult> taskResults = Lists.newArrayList();
|
||||
|
||||
for (StatisticsDesc statsDesc : statsDescs) {
|
||||
StatsCategory category = statsDesc.getStatsCategory();
|
||||
StatsGranularity granularity = statsDesc.getStatsGranularity();
|
||||
TaskResult result = createNewTaskResult(category, granularity);
|
||||
List<StatsType> statsTypes = statsDesc.getStatsTypes();
|
||||
|
||||
for (StatsType statsType : statsTypes) {
|
||||
switch (statsType) {
|
||||
case MAX_SIZE:
|
||||
case AVG_SIZE:
|
||||
getColSize(category, statsType, result);
|
||||
break;
|
||||
case ROW_COUNT:
|
||||
getRowCount(category.getDbId(), category.getTableId(), granularity, result);
|
||||
break;
|
||||
case DATA_SIZE:
|
||||
getDataSize(category.getDbId(), category.getTableId(), granularity, result);
|
||||
break;
|
||||
default:
|
||||
throw new DdlException("Unsupported statistics type(" + statsType + ").");
|
||||
}
|
||||
}
|
||||
|
||||
taskResults.add(result);
|
||||
}
|
||||
|
||||
return new StatisticsTaskResult(taskResults);
|
||||
}
|
||||
|
||||
private void getColSize(StatsCategory category, StatsType statsType,
|
||||
TaskResult result) throws DdlException {
|
||||
OlapTable table = getNotNullOlapTable(category.getDbId(), category.getTableId());
|
||||
Column column = getNotNullColumn(table, category.getColumnName());
|
||||
int colSize = column.getDataType().getSlotSize();
|
||||
result.getStatsTypeToValue().put(statsType, String.valueOf(colSize));
|
||||
}
|
||||
|
||||
private void getRowCount(long dbId, long tableId, StatsGranularity granularity,
|
||||
TaskResult result) throws DdlException {
|
||||
OlapTable table = getNotNullOlapTable(dbId, tableId);
|
||||
|
||||
switch (granularity.getGranularity()) {
|
||||
case TABLE:
|
||||
long tblRowCount = table.getRowCount();
|
||||
result.getStatsTypeToValue().put(StatsType.ROW_COUNT, String.valueOf(tblRowCount));
|
||||
break;
|
||||
case PARTITION:
|
||||
Partition partition = getNotNullPartition(granularity, table);
|
||||
long ptRowCount = partition.getBaseIndex().getRowCount();
|
||||
result.getStatsTypeToValue().put(StatsType.ROW_COUNT, String.valueOf(ptRowCount));
|
||||
break;
|
||||
case TABLET:
|
||||
default:
|
||||
throw new DdlException("Unsupported granularity(" + granularity + ").");
|
||||
}
|
||||
}
|
||||
|
||||
private void getDataSize(long dbId, long tableId, StatsGranularity granularity,
|
||||
TaskResult result) throws DdlException {
|
||||
OlapTable table = getNotNullOlapTable(dbId, tableId);
|
||||
|
||||
switch (granularity.getGranularity()) {
|
||||
case TABLE:
|
||||
long tblDataSize = table.getDataSize();
|
||||
result.getStatsTypeToValue().put(StatsType.DATA_SIZE, String.valueOf(tblDataSize));
|
||||
break;
|
||||
case PARTITION:
|
||||
Partition partition = getNotNullPartition(granularity, table);
|
||||
long partitionSize = partition.getBaseIndex().getDataSize();
|
||||
result.getStatsTypeToValue().put(StatsType.DATA_SIZE, String.valueOf(partitionSize));
|
||||
break;
|
||||
case TABLET:
|
||||
default:
|
||||
throw new DdlException("Unsupported granularity(" + granularity + ").");
|
||||
}
|
||||
}
|
||||
|
||||
private OlapTable getNotNullOlapTable(long dbId, long tableId) throws DdlException {
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(dbId);
|
||||
return (OlapTable) db.getTableOrDdlException(tableId);
|
||||
}
|
||||
|
||||
private Partition getNotNullPartition(StatsGranularity granularity, OlapTable olapTable) throws DdlException {
|
||||
Partition partition = olapTable.getPartition(granularity.getPartitionId());
|
||||
if (partition == null) {
|
||||
throw new DdlException("Partition(" + granularity.getPartitionId() + ") not found.");
|
||||
}
|
||||
return partition;
|
||||
}
|
||||
|
||||
private Column getNotNullColumn(Table table, String colName) throws DdlException {
|
||||
Column column = table.getColumn(colName);
|
||||
if (column == null) {
|
||||
throw new DdlException("Column(" + colName + ") not found.");
|
||||
}
|
||||
return column;
|
||||
}
|
||||
}
|
||||
@ -1,163 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.util.Util;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
/**
|
||||
* There are the statistics of partition.
|
||||
* The partition stats are mainly used to provide input for the Optimizer's cost model.
|
||||
* The description of partition stats are following:
|
||||
* - @rowCount: The row count of partition.
|
||||
* - @dataSize: The data size of partition.
|
||||
* - @nameToColumnStats: <@String columnName, @ColumnStats columnStats>
|
||||
* <p>
|
||||
* Each column in the Table will have corresponding @ColumnStats.
|
||||
* Those @ColumnStats are recorded in @nameToColumnStats form of MAP.
|
||||
* This facilitates the optimizer to quickly find the corresponding:
|
||||
* - @ColumnStats: based on the column name.
|
||||
* - @rowCount: The row count of partition.
|
||||
* - @dataSize: The data size of partition.
|
||||
* <p>
|
||||
* The granularity of the statistics is whole partition.
|
||||
* For example: "@rowCount = 1000" means that the row count is 1000 in the whole partition.
|
||||
* <p>
|
||||
* After the statistics task is successfully completed, update the PartitionStats,
|
||||
* PartitionStats should not be updated in any other way.
|
||||
*/
|
||||
public class PartitionStats {
|
||||
public static final StatsType DATA_SIZE = StatsType.DATA_SIZE;
|
||||
public static final StatsType ROW_COUNT = StatsType.ROW_COUNT;
|
||||
|
||||
private static final Predicate<Long> DESIRED_ROW_COUNT_PRED = (v) -> v >= -1L;
|
||||
private static final Predicate<Long> DESIRED_DATA_SIZE_PRED = (v) -> v >= -1L;
|
||||
|
||||
private long rowCount = -1;
|
||||
private long dataSize = -1;
|
||||
private final Map<String, ColumnStat> nameToColumnStats = Maps.newConcurrentMap();
|
||||
|
||||
/**
|
||||
* Return a default partition statistic.
|
||||
*/
|
||||
public static PartitionStats getDefaultPartitionStats() {
|
||||
return new PartitionStats();
|
||||
}
|
||||
|
||||
public PartitionStats() {
|
||||
}
|
||||
|
||||
public PartitionStats(long rowCount, long dataSize) {
|
||||
this.rowCount = rowCount;
|
||||
this.dataSize = dataSize;
|
||||
}
|
||||
|
||||
public long getRowCount() {
|
||||
return rowCount;
|
||||
}
|
||||
|
||||
public void setRowCount(long rowCount) {
|
||||
this.rowCount = rowCount;
|
||||
}
|
||||
|
||||
public long getDataSize() {
|
||||
return dataSize;
|
||||
}
|
||||
|
||||
public void setDataSize(long dataSize) {
|
||||
this.dataSize = dataSize;
|
||||
}
|
||||
|
||||
public Map<String, ColumnStat> getNameToColumnStats() {
|
||||
return nameToColumnStats;
|
||||
}
|
||||
|
||||
public ColumnStat getColumnStats(String columnName) {
|
||||
return nameToColumnStats.get(columnName);
|
||||
}
|
||||
|
||||
/**
|
||||
* If the column statistics do not exist, the default statistics will be returned.
|
||||
*/
|
||||
public ColumnStat getColumnStatsOrDefault(String columnName) {
|
||||
return nameToColumnStats.getOrDefault(columnName,
|
||||
ColumnStat.getDefaultColumnStats());
|
||||
}
|
||||
|
||||
/**
|
||||
* Show the partition row count and data size.
|
||||
*/
|
||||
public List<String> getShowInfo() {
|
||||
List<String> result = Lists.newArrayList();
|
||||
result.add(Long.toString(rowCount));
|
||||
result.add(Long.toString(dataSize));
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* After the statistics task is successfully completed, update the statistics of the partition,
|
||||
* statistics should not be updated in any other way.
|
||||
*/
|
||||
public void updatePartitionStats(Map<StatsType, String> statsTypeToValue) throws AnalysisException {
|
||||
for (Map.Entry<StatsType, String> entry : statsTypeToValue.entrySet()) {
|
||||
StatsType statsType = entry.getKey();
|
||||
String value = entry.getValue();
|
||||
if (statsType == ROW_COUNT) {
|
||||
rowCount = Util.getLongPropertyOrDefault(value, rowCount,
|
||||
DESIRED_ROW_COUNT_PRED, ROW_COUNT + " should >= -1");
|
||||
} else if (statsType == DATA_SIZE) {
|
||||
dataSize = Util.getLongPropertyOrDefault(value, dataSize,
|
||||
DESIRED_DATA_SIZE_PRED, DATA_SIZE + " should >= -1");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* After the statistics task is successfully completed, update the statistics of the column,
|
||||
* statistics should not be updated in any other way.
|
||||
*/
|
||||
public void updateColumnStats(String columnName,
|
||||
Type columnType,
|
||||
Map<StatsType, String> statsTypeToValue) throws AnalysisException {
|
||||
ColumnStat columnStat = getNotNullColumnStats(columnName);
|
||||
columnStat.updateStats(columnType, statsTypeToValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* If column stats is not exist, create a new one.
|
||||
*
|
||||
* @param columnName column name
|
||||
* @return @ColumnStats
|
||||
*/
|
||||
public ColumnStat getNotNullColumnStats(String columnName) {
|
||||
ColumnStat columnStat = nameToColumnStats.get(columnName);
|
||||
if (columnStat == null) {
|
||||
columnStat = new ColumnStat();
|
||||
nameToColumnStats.put(columnName, columnStat);
|
||||
}
|
||||
return columnStat;
|
||||
}
|
||||
}
|
||||
@ -1,142 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.Table;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.InvalidFormatException;
|
||||
import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
|
||||
import org.apache.doris.statistics.StatsGranularity.Granularity;
|
||||
import org.apache.doris.statistics.util.InternalQuery;
|
||||
import org.apache.doris.statistics.util.InternalQueryResult;
|
||||
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
|
||||
import org.apache.doris.statistics.util.InternalSqlTemplate;
|
||||
import org.apache.doris.statistics.util.InternalSqlTemplate.QueryType;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A statistics task that collects statistics by executing query.
|
||||
* The results of the query will be returned as @StatisticsTaskResult.
|
||||
*/
|
||||
public class SQLStatisticsTask extends StatisticsTask {
|
||||
protected QueryType queryType = QueryType.FULL;
|
||||
|
||||
protected String statement;
|
||||
|
||||
public SQLStatisticsTask(long jobId, List<StatisticsDesc> statsDescs) {
|
||||
super(jobId, statsDescs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StatisticsTaskResult call() throws Exception {
|
||||
checkStatisticsDesc();
|
||||
List<TaskResult> taskResults = Lists.newArrayList();
|
||||
|
||||
for (StatisticsDesc statsDesc : statsDescs) {
|
||||
statement = constructQuery(statsDesc);
|
||||
TaskResult taskResult = executeQuery(statsDesc);
|
||||
taskResults.add(taskResult);
|
||||
LOG.info("Collected statistics successfully by SQL: {}", statement);
|
||||
}
|
||||
|
||||
return new StatisticsTaskResult(taskResults);
|
||||
}
|
||||
|
||||
protected String constructQuery(StatisticsDesc statsDesc) throws DdlException,
|
||||
InvalidFormatException {
|
||||
Map<String, String> params = getQueryParams(statsDesc);
|
||||
|
||||
List<StatsType> statsTypes = statsDesc.getStatsTypes();
|
||||
StatsType type = statsTypes.get(0);
|
||||
|
||||
StatsGranularity statsGranularity = statsDesc.getStatsGranularity();
|
||||
Granularity granularity = statsGranularity.getGranularity();
|
||||
boolean nonPartitioned = granularity != Granularity.PARTITION;
|
||||
|
||||
switch (type) {
|
||||
case ROW_COUNT:
|
||||
return nonPartitioned ? InternalSqlTemplate.buildStatsRowCountSql(params, queryType)
|
||||
: InternalSqlTemplate.buildStatsPartitionRowCountSql(params, queryType);
|
||||
case NUM_NULLS:
|
||||
return nonPartitioned ? InternalSqlTemplate.buildStatsNumNullsSql(params, queryType)
|
||||
: InternalSqlTemplate.buildStatsPartitionNumNullsSql(params, queryType);
|
||||
case MAX_SIZE:
|
||||
case AVG_SIZE:
|
||||
return nonPartitioned ? InternalSqlTemplate.buildStatsMaxAvgSizeSql(params, queryType)
|
||||
: InternalSqlTemplate.buildStatsPartitionMaxAvgSizeSql(params, queryType);
|
||||
case NDV:
|
||||
case MAX_VALUE:
|
||||
case MIN_VALUE:
|
||||
return nonPartitioned ? InternalSqlTemplate.buildStatsMinMaxNdvValueSql(params, queryType)
|
||||
: InternalSqlTemplate.buildStatsPartitionMinMaxNdvValueSql(params, queryType);
|
||||
case DATA_SIZE:
|
||||
default:
|
||||
throw new DdlException("Unsupported statistics type: " + type);
|
||||
}
|
||||
}
|
||||
|
||||
protected TaskResult executeQuery(StatisticsDesc statsDesc) throws Exception {
|
||||
StatsGranularity granularity = statsDesc.getStatsGranularity();
|
||||
List<StatsType> statsTypes = statsDesc.getStatsTypes();
|
||||
StatsCategory category = statsDesc.getStatsCategory();
|
||||
|
||||
String dbName = Env.getCurrentInternalCatalog()
|
||||
.getDbOrDdlException(category.getDbId()).getFullName();
|
||||
InternalQuery query = new InternalQuery(dbName, statement);
|
||||
InternalQueryResult queryResult = query.query();
|
||||
List<ResultRow> resultRows = queryResult.getResultRows();
|
||||
|
||||
if (resultRows != null && resultRows.size() == 1) {
|
||||
ResultRow resultRow = resultRows.get(0);
|
||||
List<String> columns = resultRow.getColumns();
|
||||
TaskResult result = createNewTaskResult(category, granularity);
|
||||
|
||||
if (columns.size() == statsTypes.size()) {
|
||||
for (int i = 0; i < columns.size(); i++) {
|
||||
StatsType statsType = StatsType.fromString(columns.get(i));
|
||||
result.getStatsTypeToValue().put(statsType, resultRow.getString(i));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Statistics statements are executed singly and return only one row data
|
||||
throw new DdlException("Statistics query result is incorrect, statement: "
|
||||
+ statement + " queryResult: " + queryResult);
|
||||
}
|
||||
|
||||
protected Map<String, String> getQueryParams(StatisticsDesc statsDesc) throws DdlException {
|
||||
StatsCategory category = statsDesc.getStatsCategory();
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(category.getDbId());
|
||||
Table table = db.getTableOrDdlException(category.getTableId());
|
||||
|
||||
Map<String, String> params = Maps.newHashMap();
|
||||
params.put(InternalSqlTemplate.TABLE, table.getName());
|
||||
params.put(InternalSqlTemplate.PARTITION, category.getPartitionName());
|
||||
params.put(InternalSqlTemplate.COLUMN, category.getColumnName());
|
||||
|
||||
return params;
|
||||
}
|
||||
}
|
||||
@ -1,49 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.statistics.util.InternalSqlTemplate;
|
||||
import org.apache.doris.statistics.util.InternalSqlTemplate.QueryType;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* The @SampleSQLStatisticsTask is also a statistical task that executes a query
|
||||
* and uses the query result as a statistical value (same as @SQLStatisticsTask).
|
||||
* The only difference from the SQLStatisticsTask is that the query is a sampling table query.
|
||||
*/
|
||||
public class SampleSQLStatisticsTask extends SQLStatisticsTask {
|
||||
// TODO(wzt): If the job configuration has percentage value, obtain from the job,
|
||||
// if not, use the default value.
|
||||
private int samplePercentage = Config.cbo_default_sample_percentage;
|
||||
|
||||
public SampleSQLStatisticsTask(long jobId, List<StatisticsDesc> statsDescs) {
|
||||
super(jobId, statsDescs);
|
||||
queryType = QueryType.SAMPLE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, String> getQueryParams(StatisticsDesc statsDesc) throws DdlException {
|
||||
Map<String, String> params = super.getQueryParams(statsDesc);
|
||||
params.put(InternalSqlTemplate.PERCENT, String.valueOf(samplePercentage));
|
||||
return params;
|
||||
}
|
||||
}
|
||||
@ -1,228 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* There are the statistics of all tables.
|
||||
* The @Statistics are mainly used to provide input for the Optimizer's cost model.
|
||||
*
|
||||
* @idToTableStats: <@Long tableId, @TableStats tableStats>
|
||||
* - Each table will have corresponding @TableStats
|
||||
* - Those @TableStats are recorded in @idToTableStats form of MAP.
|
||||
* - This facilitates the optimizer to quickly find the corresponding
|
||||
* @TableStats based on the table id.
|
||||
*/
|
||||
public class Statistics {
|
||||
private static final Logger LOG = LogManager.getLogger(Statistics.class);
|
||||
|
||||
private final Map<Long, TableStats> idToTableStats = Maps.newConcurrentMap();
|
||||
|
||||
/**
|
||||
* Get the table stats for the given table id.
|
||||
*
|
||||
* @param tableId table id
|
||||
* @return @TableStats
|
||||
* @throws AnalysisException if table stats not exists
|
||||
*/
|
||||
public TableStats getTableStats(long tableId) throws AnalysisException {
|
||||
TableStats tableStats = idToTableStats.get(tableId);
|
||||
if (tableStats == null) {
|
||||
throw new AnalysisException("Table " + tableId + " has no statistics");
|
||||
}
|
||||
return tableStats;
|
||||
}
|
||||
|
||||
/**
|
||||
* If the table statistics do not exist, the default statistics will be returned.
|
||||
*/
|
||||
public TableStats getTableStatsOrDefault(long tableId) throws AnalysisException {
|
||||
return idToTableStats.getOrDefault(tableId, TableStats.getDefaultTableStats());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the partitions stats for the given table id.
|
||||
*
|
||||
* @param tableId table id
|
||||
* @return partition name and @PartitionStats
|
||||
* @throws AnalysisException if partitions stats not exists
|
||||
*/
|
||||
public Map<String, PartitionStats> getPartitionStats(long tableId) throws AnalysisException {
|
||||
TableStats tableStats = getTableStats(tableId);
|
||||
Map<String, PartitionStats> nameToPartitionStats = tableStats.getNameToPartitionStats();
|
||||
if (nameToPartitionStats == null) {
|
||||
throw new AnalysisException("Table " + tableId + " has no partition statistics");
|
||||
}
|
||||
return nameToPartitionStats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the partition stats for the given table id and partition name.
|
||||
*
|
||||
* @param tableId table id
|
||||
* @param partitionName partition name
|
||||
* @return partition name and @PartitionStats
|
||||
* @throws AnalysisException if partition stats not exists
|
||||
*/
|
||||
public Map<String, PartitionStats> getPartitionStats(long tableId, String partitionName)
|
||||
throws AnalysisException {
|
||||
Map<String, PartitionStats> partitionStats = getPartitionStats(tableId);
|
||||
PartitionStats partitionStat = partitionStats.get(partitionName);
|
||||
if (partitionStat == null) {
|
||||
throw new AnalysisException("Partition " + partitionName + " of table " + tableId + " has no statistics");
|
||||
}
|
||||
Map<String, PartitionStats> statsMap = Maps.newHashMap();
|
||||
statsMap.put(partitionName, partitionStat);
|
||||
return statsMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the columns stats for the given table id.
|
||||
*
|
||||
* @param tableId table id
|
||||
* @return column name and @ColumnStats
|
||||
* @throws AnalysisException if columns stats not exists
|
||||
*/
|
||||
public Map<String, ColumnStat> getColumnStats(long tableId) throws AnalysisException {
|
||||
TableStats tableStats = getTableStats(tableId);
|
||||
Map<String, ColumnStat> nameToColumnStats = tableStats.getNameToColumnStats();
|
||||
if (nameToColumnStats == null) {
|
||||
throw new AnalysisException("Table " + tableId + " has no column statistics");
|
||||
}
|
||||
return nameToColumnStats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the columns stats for the given table id and partition name.
|
||||
*
|
||||
* @param tableId table id
|
||||
* @param partitionName partition name
|
||||
* @return column name and @ColumnStats
|
||||
* @throws AnalysisException if column stats not exists
|
||||
*/
|
||||
public Map<String, ColumnStat> getColumnStats(long tableId, String partitionName) throws AnalysisException {
|
||||
Map<String, PartitionStats> partitionStats = getPartitionStats(tableId, partitionName);
|
||||
PartitionStats partitionStat = partitionStats.get(partitionName);
|
||||
if (partitionStat == null) {
|
||||
throw new AnalysisException("Partition " + partitionName + " of table " + tableId + " has no statistics");
|
||||
}
|
||||
return partitionStat.getNameToColumnStats();
|
||||
}
|
||||
|
||||
public void updateTableStats(long tableId, Map<StatsType, String> statsTypeToValue) throws AnalysisException {
|
||||
synchronized (this) {
|
||||
TableStats tableStats = getNotNullTableStats(tableId);
|
||||
tableStats.updateTableStats(statsTypeToValue);
|
||||
}
|
||||
}
|
||||
|
||||
public void updatePartitionStats(long tableId, String partitionName, Map<StatsType, String> statsTypeToValue)
|
||||
throws AnalysisException {
|
||||
synchronized (this) {
|
||||
TableStats tableStats = getNotNullTableStats(tableId);
|
||||
tableStats.updatePartitionStats(partitionName, statsTypeToValue);
|
||||
}
|
||||
}
|
||||
|
||||
public void updateColumnStats(long tableId, String columnName, Type columnType,
|
||||
Map<StatsType, String> statsTypeToValue) throws AnalysisException {
|
||||
synchronized (this) {
|
||||
TableStats tableStats = getNotNullTableStats(tableId);
|
||||
tableStats.updateColumnStats(columnName, columnType, statsTypeToValue);
|
||||
}
|
||||
}
|
||||
|
||||
public void updateColumnStats(long tableId, String partitionName, String columnName, Type columnType,
|
||||
Map<StatsType, String> statsTypeToValue) throws AnalysisException {
|
||||
synchronized (this) {
|
||||
PartitionStats partitionStats = getNotNullPartitionStats(tableId, partitionName);
|
||||
partitionStats.updateColumnStats(columnName, columnType, statsTypeToValue);
|
||||
}
|
||||
}
|
||||
|
||||
public void dropTableStats(long tableId) {
|
||||
dropPartitionStats(tableId, null);
|
||||
}
|
||||
|
||||
public void dropPartitionStats(long tableId, String partitionName) {
|
||||
synchronized (this) {
|
||||
if (idToTableStats.containsKey(tableId)) {
|
||||
if (Strings.isNullOrEmpty(partitionName)) {
|
||||
idToTableStats.remove(tableId);
|
||||
LOG.info("Deleted table(id={}) statistics.", tableId);
|
||||
} else {
|
||||
TableStats tableStats = idToTableStats.get(tableId);
|
||||
tableStats.getNameToPartitionStats().remove(partitionName);
|
||||
LOG.info("Deleted statistics for partition {} of table(id={}).",
|
||||
partitionName, tableId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: mock statistics need to be removed in the future
|
||||
public void mockTableStatsWithRowCount(long tableId, double rowCount) {
|
||||
TableStats tableStats = idToTableStats.get(tableId);
|
||||
if (tableStats == null) {
|
||||
tableStats = new TableStats(rowCount, 1);
|
||||
idToTableStats.put(tableId, tableStats);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* if the table stats is not exist, create a new one.
|
||||
*
|
||||
* @param tableId table id
|
||||
* @return @TableStats
|
||||
*/
|
||||
private TableStats getNotNullTableStats(long tableId) {
|
||||
TableStats tableStats = idToTableStats.get(tableId);
|
||||
if (tableStats == null) {
|
||||
tableStats = new TableStats();
|
||||
idToTableStats.put(tableId, tableStats);
|
||||
}
|
||||
return tableStats;
|
||||
}
|
||||
|
||||
/**
|
||||
* if the partition stats is not exist, create a new one.
|
||||
*
|
||||
* @param tableId table id
|
||||
* @param partitionName partition name
|
||||
* @return @TableStats
|
||||
*/
|
||||
private PartitionStats getNotNullPartitionStats(long tableId, String partitionName) {
|
||||
TableStats tableStats = getNotNullTableStats(tableId);
|
||||
Map<String, PartitionStats> nameToPartitionStats = tableStats.getNameToPartitionStats();
|
||||
PartitionStats partitionStats = nameToPartitionStats.get(partitionName);
|
||||
if (partitionStats == null) {
|
||||
partitionStats = new PartitionStats();
|
||||
nameToPartitionStats.put(partitionName, partitionStats);
|
||||
}
|
||||
return partitionStats;
|
||||
}
|
||||
}
|
||||
@ -1,61 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class StatisticsDesc {
|
||||
private StatsCategory statsCategory;
|
||||
|
||||
private StatsGranularity statsGranularity;
|
||||
|
||||
private List<StatsType> statsTypes;
|
||||
|
||||
public StatisticsDesc(StatsCategory statsCategory,
|
||||
StatsGranularity statsGranularity,
|
||||
List<StatsType> statsTypes) {
|
||||
this.statsCategory = statsCategory;
|
||||
this.statsGranularity = statsGranularity;
|
||||
this.statsTypes = statsTypes;
|
||||
}
|
||||
|
||||
public StatsCategory getStatsCategory() {
|
||||
return statsCategory;
|
||||
}
|
||||
|
||||
public void setStatsCategory(StatsCategory statsCategory) {
|
||||
this.statsCategory = statsCategory;
|
||||
}
|
||||
|
||||
public StatsGranularity getStatsGranularity() {
|
||||
return statsGranularity;
|
||||
}
|
||||
|
||||
public void setStatsGranularity(StatsGranularity statsGranularity) {
|
||||
this.statsGranularity = statsGranularity;
|
||||
}
|
||||
|
||||
public List<StatsType> getStatsTypes() {
|
||||
return statsTypes;
|
||||
}
|
||||
|
||||
public void setStatsTypes(List<StatsType> statsTypes) {
|
||||
this.statsTypes = statsTypes;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,342 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.Table;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.util.TimeUtils;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
/***
|
||||
* Used to store statistics job info,
|
||||
* including job status, progress, etc.
|
||||
*/
|
||||
public class StatisticsJob {
|
||||
private static final Logger LOG = LogManager.getLogger(StatisticsJob.class);
|
||||
|
||||
public enum JobState {
|
||||
PENDING,
|
||||
SCHEDULING,
|
||||
RUNNING,
|
||||
FINISHED,
|
||||
FAILED,
|
||||
CANCELLED
|
||||
}
|
||||
|
||||
protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true);
|
||||
|
||||
private final long id = Env.getCurrentEnv().getNextId();
|
||||
|
||||
/**
|
||||
* to be collected database stats.
|
||||
*/
|
||||
private final long dbId;
|
||||
|
||||
/**
|
||||
* to be collected table stats.
|
||||
*/
|
||||
private final Set<Long> tblIds;
|
||||
|
||||
/**
|
||||
* to be collected partition stats.
|
||||
*/
|
||||
private final Map<Long, List<String>> tableIdToPartitionName;
|
||||
|
||||
/**
|
||||
* to be collected column stats.
|
||||
*/
|
||||
private final Map<Long, List<String>> tableIdToColumnName;
|
||||
|
||||
private final Map<String, String> properties;
|
||||
|
||||
/**
|
||||
* to be executed tasks.
|
||||
*/
|
||||
private final List<StatisticsTask> tasks = Lists.newArrayList();
|
||||
|
||||
private JobState jobState = JobState.PENDING;
|
||||
private final List<String> errorMsgs = Lists.newArrayList();
|
||||
|
||||
private final long createTime = System.currentTimeMillis();
|
||||
private long startTime = -1L;
|
||||
private long finishTime = -1L;
|
||||
private int progress = 0;
|
||||
|
||||
public StatisticsJob(Long dbId,
|
||||
Set<Long> tblIds,
|
||||
Map<Long, List<String>> tblIdToPartitionName,
|
||||
Map<Long, List<String>> tableIdToColumnName,
|
||||
Map<String, String> properties) {
|
||||
this.dbId = dbId;
|
||||
this.tblIds = tblIds;
|
||||
this.tableIdToPartitionName = tblIdToPartitionName;
|
||||
this.tableIdToColumnName = tableIdToColumnName;
|
||||
this.properties = properties == null ? Maps.newHashMap() : properties;
|
||||
}
|
||||
|
||||
public void readLock() {
|
||||
lock.readLock().lock();
|
||||
}
|
||||
|
||||
public void readUnlock() {
|
||||
lock.readLock().unlock();
|
||||
}
|
||||
|
||||
private void writeLock() {
|
||||
lock.writeLock().lock();
|
||||
}
|
||||
|
||||
private void writeUnlock() {
|
||||
lock.writeLock().unlock();
|
||||
}
|
||||
|
||||
public long getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public long getDbId() {
|
||||
return dbId;
|
||||
}
|
||||
|
||||
public Set<Long> getTblIds() {
|
||||
return tblIds;
|
||||
}
|
||||
|
||||
public Map<Long, List<String>> getTableIdToPartitionName() {
|
||||
return tableIdToPartitionName;
|
||||
}
|
||||
|
||||
public Map<Long, List<String>> getTableIdToColumnName() {
|
||||
return tableIdToColumnName;
|
||||
}
|
||||
|
||||
public Map<String, String> getProperties() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
public List<StatisticsTask> getTasks() {
|
||||
return tasks;
|
||||
}
|
||||
|
||||
public List<String> getErrorMsgs() {
|
||||
return errorMsgs;
|
||||
}
|
||||
|
||||
public JobState getJobState() {
|
||||
return jobState;
|
||||
}
|
||||
|
||||
public long getCreateTime() {
|
||||
return createTime;
|
||||
}
|
||||
|
||||
public long getStartTime() {
|
||||
return startTime;
|
||||
}
|
||||
|
||||
public long getFinishTime() {
|
||||
return finishTime;
|
||||
}
|
||||
|
||||
public int getProgress() {
|
||||
return progress;
|
||||
}
|
||||
|
||||
public void updateJobState(JobState newState) throws DdlException {
|
||||
LOG.info("To change statistics job(id={}) state from {} to {}", id, jobState, newState);
|
||||
writeLock();
|
||||
JobState fromState = jobState;
|
||||
try {
|
||||
unprotectedUpdateJobState(newState);
|
||||
} catch (DdlException e) {
|
||||
LOG.warn(e.getMessage(), e);
|
||||
throw e;
|
||||
} finally {
|
||||
writeUnlock();
|
||||
}
|
||||
LOG.info("Statistics job(id={}) state changed from {} to {}", id, fromState, jobState);
|
||||
}
|
||||
|
||||
private void unprotectedUpdateJobState(JobState newState) throws DdlException {
|
||||
// PENDING -> PENDING/SCHEDULING/FAILED/CANCELLED
|
||||
if (jobState == JobState.PENDING) {
|
||||
switch (newState) {
|
||||
case PENDING:
|
||||
case SCHEDULING:
|
||||
break;
|
||||
case FAILED:
|
||||
case CANCELLED:
|
||||
finishTime = System.currentTimeMillis();
|
||||
break;
|
||||
default:
|
||||
throw new DdlException("Invalid job state transition from " + jobState + " to " + newState);
|
||||
}
|
||||
} else if (jobState == JobState.SCHEDULING) { // SCHEDULING -> RUNNING/FAILED/CANCELLED
|
||||
switch (newState) {
|
||||
case RUNNING:
|
||||
startTime = System.currentTimeMillis();
|
||||
break;
|
||||
case FAILED:
|
||||
case CANCELLED:
|
||||
finishTime = System.currentTimeMillis();
|
||||
break;
|
||||
default:
|
||||
throw new DdlException("Invalid job state transition from " + jobState + " to " + newState);
|
||||
}
|
||||
} else if (jobState == JobState.RUNNING) { // RUNNING -> FINISHED/FAILED/CANCELLED
|
||||
switch (newState) {
|
||||
case FINISHED:
|
||||
case FAILED:
|
||||
case CANCELLED:
|
||||
// set finish time
|
||||
finishTime = System.currentTimeMillis();
|
||||
break;
|
||||
default:
|
||||
throw new DdlException("Invalid job state transition from " + jobState + " to " + newState);
|
||||
}
|
||||
} else {
|
||||
// TODO
|
||||
throw new DdlException("Invalid job state transition from " + jobState + " to " + newState);
|
||||
}
|
||||
jobState = newState;
|
||||
}
|
||||
|
||||
public void updateJobInfoByTaskId(Long taskId, String errorMsg) throws DdlException {
|
||||
writeLock();
|
||||
try {
|
||||
for (StatisticsTask task : tasks) {
|
||||
if (taskId == task.getId()) {
|
||||
if (Strings.isNullOrEmpty(errorMsg)) {
|
||||
progress += 1;
|
||||
if (progress == tasks.size()) {
|
||||
unprotectedUpdateJobState(StatisticsJob.JobState.FINISHED);
|
||||
}
|
||||
task.updateTaskState(StatisticsTask.TaskState.FINISHED);
|
||||
} else {
|
||||
errorMsgs.add(errorMsg);
|
||||
task.updateTaskState(StatisticsTask.TaskState.FAILED);
|
||||
unprotectedUpdateJobState(StatisticsJob.JobState.FAILED);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
writeUnlock();
|
||||
}
|
||||
}
|
||||
|
||||
public List<Comparable> getShowInfo(@Nullable Long tableId) throws AnalysisException {
|
||||
List<Comparable> result = Lists.newArrayList();
|
||||
|
||||
result.add(Long.toString(id));
|
||||
|
||||
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
|
||||
result.add(TimeUtils.longToTimeString(createTime, dateFormat));
|
||||
result.add(startTime != -1L ? TimeUtils.longToTimeString(startTime, dateFormat) : "N/A");
|
||||
result.add(finishTime != -1L ? TimeUtils.longToTimeString(finishTime, dateFormat) : "N/A");
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (String errorMsg : errorMsgs) {
|
||||
sb.append(errorMsg).append("\n");
|
||||
}
|
||||
result.add(sb.toString());
|
||||
|
||||
int totalTaskNum = 0;
|
||||
int finishedTaskNum = 0;
|
||||
Map<Long, Set<String>> tblIdToCols = Maps.newHashMap();
|
||||
|
||||
for (StatisticsTask task : tasks) {
|
||||
List<StatisticsDesc> statsDescs = task.getStatsDescs();
|
||||
|
||||
if (!statsDescs.isEmpty()) {
|
||||
// The same task has the same stats properties
|
||||
StatsCategory statsCategory = statsDescs.get(0).getStatsCategory();
|
||||
long tblId = statsCategory.getTableId();
|
||||
|
||||
if (tableId == null || tableId == tblId) {
|
||||
totalTaskNum++;
|
||||
if (task.getTaskState() == StatisticsTask.TaskState.FINISHED) {
|
||||
finishedTaskNum++;
|
||||
}
|
||||
|
||||
String col = statsCategory.getColumnName();
|
||||
if (Strings.isNullOrEmpty(col)) {
|
||||
continue;
|
||||
}
|
||||
tblIdToCols.computeIfAbsent(tblId,
|
||||
(key) -> Sets.newHashSet()).add(col);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<String> scope = Lists.newArrayList();
|
||||
Database db = Env.getCurrentEnv().getInternalCatalog().getDbOrAnalysisException(dbId);
|
||||
for (Long tblId : tblIds) {
|
||||
try {
|
||||
Table table = db.getTableOrAnalysisException(tblId);
|
||||
List<Column> baseSchema = table.getBaseSchema();
|
||||
Set<String> cols = tblIdToCols.get(tblId);
|
||||
if (cols != null) {
|
||||
if (baseSchema.size() == cols.size()) {
|
||||
scope.add(table.getName() + "(*)");
|
||||
} else {
|
||||
scope.add(table.getName() + "(" + StringUtils.join(cols.toArray(), ", ") + ")");
|
||||
}
|
||||
}
|
||||
} catch (AnalysisException e) {
|
||||
// catch this exception when table is dropped
|
||||
LOG.info("get table failed, tableId: " + tblId, e);
|
||||
}
|
||||
}
|
||||
|
||||
// exclude invalid info
|
||||
if (scope.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
result.add(StringUtils.join(scope.toArray(), ","));
|
||||
result.add(finishedTaskNum + "/" + totalTaskNum);
|
||||
|
||||
if (totalTaskNum > 0 && totalTaskNum == finishedTaskNum) {
|
||||
result.add("FINISHED");
|
||||
} else {
|
||||
result.add(jobState.toString());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@ -1,192 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.analysis.ShowAnalyzeStmt;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.Table;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.ErrorCode;
|
||||
import org.apache.doris.common.ErrorReport;
|
||||
import org.apache.doris.common.util.ListComparator;
|
||||
import org.apache.doris.common.util.OrderByPair;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* For unified management of statistics job,
|
||||
* including job addition, cancellation, scheduling, etc.
|
||||
*/
|
||||
public class StatisticsJobManager {
|
||||
private static final Logger LOG = LogManager.getLogger(StatisticsJobManager.class);
|
||||
|
||||
/**
|
||||
* save statistics job status information
|
||||
*/
|
||||
private final Map<Long, StatisticsJob> idToStatisticsJob = Maps.newConcurrentMap();
|
||||
|
||||
public Map<Long, StatisticsJob> getIdToStatisticsJob() {
|
||||
return idToStatisticsJob;
|
||||
}
|
||||
|
||||
/**
|
||||
* The statistical job has the following restrict:
|
||||
* - Rule1: The same table cannot have two unfinished statistics jobs
|
||||
* - Rule2: The unfinished statistics job could not more than Config.max_statistics_job_num
|
||||
* - Rule3: The job for external table is not supported
|
||||
*/
|
||||
private void checkRestrict(long dbId, Set<Long> tableIds) throws AnalysisException {
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbId);
|
||||
db.readLock();
|
||||
try {
|
||||
// check table type
|
||||
for (Long tableId : tableIds) {
|
||||
Table table = db.getTableOrAnalysisException(tableId);
|
||||
if (table.getType() != Table.TableType.OLAP) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_NOT_OLAP_TABLE, db.getFullName(),
|
||||
table.getName(), "ANALYZE");
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
db.readUnlock();
|
||||
}
|
||||
|
||||
int unfinishedJobs = 0;
|
||||
|
||||
// check table unfinished job
|
||||
for (StatisticsJob statisticsJob : idToStatisticsJob.values()) {
|
||||
StatisticsJob.JobState jobState = statisticsJob.getJobState();
|
||||
Set<Long> tblIds = statisticsJob.getTblIds();
|
||||
if (jobState == StatisticsJob.JobState.PENDING
|
||||
|| jobState == StatisticsJob.JobState.SCHEDULING
|
||||
|| jobState == StatisticsJob.JobState.RUNNING) {
|
||||
for (Long tableId : tableIds) {
|
||||
if (tblIds.contains(tableId)) {
|
||||
throw new AnalysisException("The table(id=" + tableId + ") have unfinished statistics jobs");
|
||||
}
|
||||
}
|
||||
unfinishedJobs++;
|
||||
}
|
||||
}
|
||||
|
||||
// check the number of unfinished tasks
|
||||
if (unfinishedJobs > Config.cbo_max_statistics_job_num) {
|
||||
throw new AnalysisException("The unfinished statistics job could not more than cbo_max_statistics_job_num: "
|
||||
+ Config.cbo_max_statistics_job_num);
|
||||
}
|
||||
}
|
||||
|
||||
public List<List<String>> getAnalyzeJobInfos(ShowAnalyzeStmt showStmt) throws AnalysisException {
|
||||
List<List<Comparable>> results = Lists.newArrayList();
|
||||
|
||||
String stateValue = showStmt.getStateValue();
|
||||
StatisticsJob.JobState jobState = null;
|
||||
if (!Strings.isNullOrEmpty(stateValue)) {
|
||||
jobState = StatisticsJob.JobState.valueOf(stateValue);
|
||||
}
|
||||
|
||||
// step 1: get job infos
|
||||
List<Long> jobIds = showStmt.getJobIds();
|
||||
if (jobIds != null && !jobIds.isEmpty()) {
|
||||
for (Long jobId : jobIds) {
|
||||
StatisticsJob statisticsJob = idToStatisticsJob.get(jobId);
|
||||
if (statisticsJob == null) {
|
||||
throw new AnalysisException("No such job id: " + jobId);
|
||||
}
|
||||
if (jobState == null || jobState == statisticsJob.getJobState()) {
|
||||
List<Comparable> showInfo = statisticsJob.getShowInfo(null);
|
||||
if (showInfo == null || showInfo.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
results.add(showInfo);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
long dbId = showStmt.getDbId();
|
||||
Set<Long> tblIds = showStmt.getTblIds();
|
||||
for (StatisticsJob statisticsJob : idToStatisticsJob.values()) {
|
||||
long jobDbId = statisticsJob.getDbId();
|
||||
if (jobDbId == dbId) {
|
||||
// check the state
|
||||
if (jobState == null || jobState == statisticsJob.getJobState()) {
|
||||
Set<Long> jobTblIds = statisticsJob.getTblIds();
|
||||
// get the intersection of two sets
|
||||
Set<Long> set = Sets.newHashSet();
|
||||
set.addAll(jobTblIds);
|
||||
set.retainAll(tblIds);
|
||||
for (long tblId : set) {
|
||||
List<Comparable> showInfo = statisticsJob.getShowInfo(tblId);
|
||||
if (showInfo == null || showInfo.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
results.add(showInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// step2: order the result
|
||||
ListComparator<List<Comparable>> comparator;
|
||||
List<OrderByPair> orderByPairs = showStmt.getOrderByPairs();
|
||||
if (orderByPairs == null) {
|
||||
// sort by id asc
|
||||
comparator = new ListComparator<>(0);
|
||||
} else {
|
||||
OrderByPair[] orderByPairArr = new OrderByPair[orderByPairs.size()];
|
||||
comparator = new ListComparator<>(orderByPairs.toArray(orderByPairArr));
|
||||
}
|
||||
results.sort(comparator);
|
||||
|
||||
// step3: filter by limit
|
||||
long limit = showStmt.getLimit();
|
||||
long offset = showStmt.getOffset() == -1L ? 0 : showStmt.getOffset();
|
||||
if (offset >= results.size()) {
|
||||
results = Collections.emptyList();
|
||||
} else if (limit != -1L) {
|
||||
if ((limit + offset) >= results.size()) {
|
||||
results = results.subList((int) offset, results.size());
|
||||
} else {
|
||||
results = results.subList((int) offset, (int) (limit + offset));
|
||||
}
|
||||
}
|
||||
|
||||
// step4: convert to result and return it
|
||||
List<List<String>> rows = Lists.newArrayList();
|
||||
for (List<Comparable> result : results) {
|
||||
List<String> row = result.stream().map(Object::toString)
|
||||
.collect(Collectors.toList());
|
||||
rows.add(row);
|
||||
}
|
||||
|
||||
return rows;
|
||||
}
|
||||
}
|
||||
@ -1,530 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.KeysType;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.Partition;
|
||||
import org.apache.doris.catalog.Table;
|
||||
import org.apache.doris.catalog.Tablet;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.util.MasterDaemon;
|
||||
import org.apache.doris.statistics.StatsCategory.Category;
|
||||
import org.apache.doris.statistics.StatsGranularity.Granularity;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Queues;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Queue;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Schedule statistics job.
|
||||
* 1. divide job to multi task
|
||||
* 2. submit all task to StatisticsTaskScheduler
|
||||
* Switch job state from pending to scheduling.
|
||||
*/
|
||||
public class StatisticsJobScheduler extends MasterDaemon {
|
||||
private static final Logger LOG = LogManager.getLogger(StatisticsJobScheduler.class);
|
||||
|
||||
/**
|
||||
* If the table row-count is greater than the maximum number of Be scans for a single BE,
|
||||
* we'll divide subtasks by partition. relevant values(3700000000L&600000000L) are derived from test.
|
||||
* COUNT_MAX_SCAN_PER_TASK is for count(expr), NDV_MAX_SCAN_PER_TASK is for min(c1)/max(c1)/ndv(c1).
|
||||
*/
|
||||
private static final long COUNT_MAX_SCAN_PER_TASK = 3700000000L;
|
||||
private static final long NDV_MAX_SCAN_PER_TASK = 600000000L;
|
||||
|
||||
/**
|
||||
* if the table row count is greater than the value, use sampleSqlTask instead of SqlTask.
|
||||
*/
|
||||
private static final int MIN_SAMPLE_ROWS = 200000;
|
||||
|
||||
/**
|
||||
* Different statistics need to be collected for the jobs submitted by users.
|
||||
* if all statistics be collected at the same time, the cluster may be overburdened
|
||||
* and normal query services may be affected. Therefore, we put the jobs into the queue
|
||||
* and schedule them one by one, and finally divide each job to several subtasks and execute them.
|
||||
*/
|
||||
public final Queue<StatisticsJob> pendingJobQueue
|
||||
= Queues.newLinkedBlockingQueue(Config.cbo_max_statistics_job_num);
|
||||
|
||||
public StatisticsJobScheduler() {
|
||||
super("Statistics job scheduler",
|
||||
Config.statistic_job_scheduler_execution_interval_ms);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void runAfterCatalogReady() {
|
||||
StatisticsJob pendingJob = pendingJobQueue.peek();
|
||||
if (pendingJob != null) {
|
||||
try {
|
||||
if (pendingJob.getTasks().size() == 0) {
|
||||
divide(pendingJob);
|
||||
}
|
||||
List<StatisticsTask> tasks = pendingJob.getTasks();
|
||||
Env.getCurrentEnv().getStatisticsTaskScheduler().addTasks(tasks);
|
||||
pendingJob.updateJobState(StatisticsJob.JobState.SCHEDULING);
|
||||
pendingJobQueue.remove();
|
||||
} catch (IllegalStateException e) {
|
||||
// throw IllegalStateException if the queue is full, re-add the tasks next time
|
||||
LOG.info("The statistics task queue is full, schedule the job(id={}) later", pendingJob.getId());
|
||||
} catch (DdlException e) {
|
||||
pendingJobQueue.remove();
|
||||
try {
|
||||
// TODO change to without exception
|
||||
pendingJob.updateJobState(StatisticsJob.JobState.FAILED);
|
||||
} catch (DdlException ddlException) {
|
||||
LOG.fatal(ddlException.getMessage(), e);
|
||||
}
|
||||
LOG.info("Failed to schedule the statistical job(id={})", pendingJob.getId(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void addPendingJob(StatisticsJob statisticsJob) throws IllegalStateException {
|
||||
pendingJobQueue.add(statisticsJob);
|
||||
}
|
||||
|
||||
/**
|
||||
* Statistics tasks are of the following types:
|
||||
* table:
|
||||
* - row_count: table row count are critical in estimating cardinality and memory usage of scan nodes.
|
||||
* - data_size: table size, not applicable to CBO, mainly used to monitor and manage table size.
|
||||
* column:
|
||||
* - num_distinct_value: used to determine the selectivity of an equivalent expression.
|
||||
* - min: The minimum value.
|
||||
* - max: The maximum value.
|
||||
* - num_nulls: number of nulls.
|
||||
* - avg_col_len: the average length of a column, in bytes, is used for memory and network IO evaluation.
|
||||
* - max_col_len: the Max length of the column, in bytes, is used for memory and network IO evaluation.
|
||||
* <p>
|
||||
* Divide:
|
||||
* - min, max, ndv: These three full indicators are collected by a sub-task.
|
||||
* - max_col_lens, avg_col_lens: Two sampling indicators were collected by a sub-task.
|
||||
* <p>
|
||||
* If the table row-count is greater than the maximum number of Be scans for a single BE,
|
||||
* we'll divide subtasks by partition. relevant values(3700000000L&600000000L) are derived from test.
|
||||
* <p>
|
||||
* Eventually, we will get several subtasks of the following types:
|
||||
*
|
||||
* @throws DdlException DdlException
|
||||
* @see MetaStatisticsTask
|
||||
* @see SampleSQLStatisticsTask
|
||||
* @see SQLStatisticsTask
|
||||
*/
|
||||
private void divide(StatisticsJob job) throws DdlException {
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(job.getDbId());
|
||||
Set<Long> tblIds = job.getTblIds();
|
||||
|
||||
for (Long tblId : tblIds) {
|
||||
Optional<Table> optionalTbl = db.getTable(tblId);
|
||||
if (optionalTbl.isPresent()) {
|
||||
Table table = optionalTbl.get();
|
||||
if (!table.isPartitioned()) {
|
||||
getStatsTaskByTable(job, tblId);
|
||||
} else {
|
||||
getStatsTaskByPartition(job, tblId);
|
||||
}
|
||||
} else {
|
||||
LOG.warn("Table(id={}) not found in the database {}", tblId, db.getFullName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For non-partitioned table, dividing the job into several subtasks.
|
||||
*
|
||||
* @param job statistics job
|
||||
* @param tableId table id
|
||||
* @throws DdlException exception
|
||||
*/
|
||||
private void getStatsTaskByTable(StatisticsJob job, long tableId) throws DdlException {
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(job.getDbId());
|
||||
OlapTable table = (OlapTable) db.getTableOrDdlException(tableId);
|
||||
|
||||
if (table.getDataSize() == 0) {
|
||||
LOG.info("Do not collect statistics for empty table {}", table.getName());
|
||||
return;
|
||||
}
|
||||
|
||||
Map<Long, List<String>> tblIdToColName = job.getTableIdToColumnName();
|
||||
List<String> colNames = tblIdToColName.get(tableId);
|
||||
|
||||
List<Long> backendIds = Env.getCurrentSystemInfo().getBackendIds(true);
|
||||
|
||||
// step1: collect statistics by metadata
|
||||
List<StatisticsDesc> descs = Lists.newArrayList();
|
||||
|
||||
// table data size
|
||||
StatsCategory dsCategory = getTableStatsCategory(job.getDbId(), tableId);
|
||||
StatsGranularity dsGranularity = getTableGranularity(tableId);
|
||||
StatisticsDesc dsStatsDesc = new StatisticsDesc(dsCategory,
|
||||
dsGranularity, Collections.singletonList(StatsType.DATA_SIZE));
|
||||
descs.add(dsStatsDesc);
|
||||
|
||||
// table row count
|
||||
if (table.getKeysType() == KeysType.DUP_KEYS) {
|
||||
StatsCategory rcCategory = getTableStatsCategory(job.getDbId(), tableId);
|
||||
StatsGranularity rcGranularity = getTableGranularity(tableId);
|
||||
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
|
||||
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
|
||||
descs.add(rcStatsDesc);
|
||||
}
|
||||
|
||||
// variable-length columns
|
||||
List<String> strColNames = Lists.newArrayList();
|
||||
|
||||
// column max size and avg size
|
||||
for (String colName : colNames) {
|
||||
Column column = table.getColumn(colName);
|
||||
if (column == null) {
|
||||
LOG.info("Column {} not found in table {}", colName, table.getName());
|
||||
continue;
|
||||
}
|
||||
Type colType = column.getType();
|
||||
if (colType.isStringType()) {
|
||||
strColNames.add(colName);
|
||||
continue;
|
||||
}
|
||||
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName);
|
||||
StatsGranularity colGranularity = getTableGranularity(tableId);
|
||||
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
|
||||
colGranularity, Arrays.asList(StatsType.MAX_SIZE, StatsType.AVG_SIZE));
|
||||
descs.add(colStatsDesc);
|
||||
}
|
||||
|
||||
// all meta statistics are collected in one task
|
||||
MetaStatisticsTask metaStatsTask = new MetaStatisticsTask(job.getId(), descs);
|
||||
job.getTasks().add(metaStatsTask);
|
||||
|
||||
long rowCount = table.getRowCount();
|
||||
|
||||
// step2: collect statistics by sql
|
||||
// table row count (table model is AGGREGATE or UNIQUE)
|
||||
if (table.getKeysType() != KeysType.DUP_KEYS) {
|
||||
if (rowCount < backendIds.size() * COUNT_MAX_SCAN_PER_TASK) {
|
||||
StatsCategory rcCategory = getTableStatsCategory(job.getDbId(), tableId);
|
||||
StatsGranularity rcGranularity = getTableGranularity(tableId);
|
||||
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
|
||||
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
|
||||
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
|
||||
Collections.singletonList(rcStatsDesc));
|
||||
job.getTasks().add(sqlTask);
|
||||
} else {
|
||||
// divide subtasks by tablet
|
||||
Collection<Partition> partitions = table.getPartitions();
|
||||
for (Partition partition : partitions) {
|
||||
Collection<Tablet> tablets = partition.getBaseIndex().getTablets();
|
||||
tablets.forEach(tablet -> {
|
||||
StatsCategory rcCategory = getTableStatsCategory(job.getDbId(), tableId);
|
||||
StatsGranularity rcGranularity = getTabletGranularity(tablet.getId());
|
||||
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
|
||||
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
|
||||
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
|
||||
Collections.singletonList(rcStatsDesc));
|
||||
job.getTasks().add(sqlTask);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// column max size, avg size
|
||||
for (String colName : strColNames) {
|
||||
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName);
|
||||
StatsGranularity colGranularity = getTableGranularity(tableId);
|
||||
getColumnSizeSqlTask(job, rowCount, colCategory, colGranularity);
|
||||
}
|
||||
|
||||
// column num nulls
|
||||
for (String colName : colNames) {
|
||||
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName);
|
||||
StatsGranularity colGranularity = getTableGranularity(tableId);
|
||||
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
|
||||
colGranularity, Collections.singletonList(StatsType.NUM_NULLS));
|
||||
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
|
||||
Collections.singletonList(colStatsDesc));
|
||||
job.getTasks().add(sqlTask);
|
||||
}
|
||||
|
||||
// column max value, min value and ndv
|
||||
for (String colName : colNames) {
|
||||
if (rowCount < backendIds.size() * NDV_MAX_SCAN_PER_TASK) {
|
||||
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName);
|
||||
StatsGranularity colGranularity = getTableGranularity(tableId);
|
||||
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
|
||||
colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV));
|
||||
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
|
||||
Collections.singletonList(colStatsDesc));
|
||||
job.getTasks().add(sqlTask);
|
||||
} else {
|
||||
// for non-partitioned table system automatically
|
||||
// generates a partition with the same name as the table name
|
||||
Collection<Partition> partitions = table.getPartitions();
|
||||
for (Partition partition : partitions) {
|
||||
List<Tablet> tablets = partition.getBaseIndex().getTablets();
|
||||
tablets.forEach(tablet -> {
|
||||
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName);
|
||||
StatsGranularity colGranularity = getTabletGranularity(tablet.getId());
|
||||
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
|
||||
colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV));
|
||||
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
|
||||
Collections.singletonList(colStatsDesc));
|
||||
job.getTasks().add(sqlTask);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If table is partitioned, dividing the job into several subtasks by partition.
|
||||
*
|
||||
* @param job statistics job
|
||||
* @param tableId table id
|
||||
* @throws DdlException exception
|
||||
*/
|
||||
private void getStatsTaskByPartition(StatisticsJob job, long tableId) throws DdlException {
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(job.getDbId());
|
||||
OlapTable table = (OlapTable) db.getTableOrDdlException(tableId);
|
||||
|
||||
Map<Long, List<String>> tblIdToColName = job.getTableIdToColumnName();
|
||||
List<String> colNames = tblIdToColName.get(tableId);
|
||||
|
||||
Map<Long, List<String>> tblIdToPartitionName = job.getTableIdToPartitionName();
|
||||
List<String> partitionNames = tblIdToPartitionName.get(tableId);
|
||||
|
||||
List<Long> backendIds = Env.getCurrentSystemInfo().getBackendIds(true);
|
||||
|
||||
for (String partitionName : partitionNames) {
|
||||
Partition partition = table.getPartition(partitionName);
|
||||
if (partition == null) {
|
||||
LOG.info("Partition {} not found in the table {}", partitionName, table.getName());
|
||||
continue;
|
||||
}
|
||||
if (partition.getDataSize() == 0) {
|
||||
LOG.info("Do not collect statistics for empty partition {} in the table {}",
|
||||
partitionName, table.getName());
|
||||
continue;
|
||||
}
|
||||
|
||||
long partitionId = partition.getId();
|
||||
long rowCount = partition.getBaseIndex().getRowCount();
|
||||
|
||||
// step1: collect statistics by metadata
|
||||
List<StatisticsDesc> descs = Lists.newArrayList();
|
||||
|
||||
// partition data size
|
||||
StatsCategory dsCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName);
|
||||
StatsGranularity dsGranularity = getPartitionGranularity(partitionId);
|
||||
StatisticsDesc dsStatsDesc = new StatisticsDesc(dsCategory,
|
||||
dsGranularity, Collections.singletonList(StatsType.DATA_SIZE));
|
||||
descs.add(dsStatsDesc);
|
||||
|
||||
// partition row count
|
||||
if (table.getKeysType() == KeysType.DUP_KEYS) {
|
||||
StatsCategory rcCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName);
|
||||
StatsGranularity rcGranularity = getPartitionGranularity(partitionId);
|
||||
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
|
||||
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
|
||||
descs.add(rcStatsDesc);
|
||||
}
|
||||
|
||||
// variable-length columns
|
||||
List<String> strColNames = Lists.newArrayList();
|
||||
|
||||
// column max size and avg size
|
||||
for (String colName : colNames) {
|
||||
Column column = table.getColumn(colName);
|
||||
if (column == null) {
|
||||
LOG.info("Column {} not found in the table {}", colName, table.getName());
|
||||
continue;
|
||||
}
|
||||
Type colType = column.getType();
|
||||
if (colType.isStringType()) {
|
||||
strColNames.add(colName);
|
||||
continue;
|
||||
}
|
||||
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName);
|
||||
StatsGranularity colGranularity = getPartitionGranularity(partitionId);
|
||||
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
|
||||
colGranularity, Arrays.asList(StatsType.MAX_SIZE, StatsType.AVG_SIZE));
|
||||
descs.add(colStatsDesc);
|
||||
}
|
||||
|
||||
// all meta statistics are collected in one task
|
||||
MetaStatisticsTask metaStatsTask = new MetaStatisticsTask(job.getId(), descs);
|
||||
job.getTasks().add(metaStatsTask);
|
||||
|
||||
// step2: collect statistics by sql
|
||||
// partition row count (table model is AGGREGATE or UNIQUE)
|
||||
if (table.getKeysType() != KeysType.DUP_KEYS) {
|
||||
if (rowCount < backendIds.size() * COUNT_MAX_SCAN_PER_TASK) {
|
||||
StatsCategory rcCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName);
|
||||
StatsGranularity rcGranularity = getPartitionGranularity(partitionId);
|
||||
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
|
||||
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
|
||||
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
|
||||
Collections.singletonList(rcStatsDesc));
|
||||
job.getTasks().add(sqlTask);
|
||||
} else {
|
||||
// divide subtasks by tablet
|
||||
List<Tablet> tablets = partition.getBaseIndex().getTablets();
|
||||
tablets.forEach(tablet -> {
|
||||
StatsCategory rcCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName);
|
||||
StatsGranularity rcGranularity = getTabletGranularity(tablet.getId());
|
||||
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
|
||||
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
|
||||
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
|
||||
Collections.singletonList(rcStatsDesc));
|
||||
job.getTasks().add(sqlTask);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// column max size, avg size
|
||||
for (String colName : strColNames) {
|
||||
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName);
|
||||
StatsGranularity colGranularity = getPartitionGranularity(partitionId);
|
||||
getColumnSizeSqlTask(job, rowCount, colCategory, colGranularity);
|
||||
}
|
||||
|
||||
// column null nums
|
||||
for (String colName : colNames) {
|
||||
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName);
|
||||
StatsGranularity colGranularity = getPartitionGranularity(partitionId);
|
||||
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
|
||||
colGranularity, Collections.singletonList(StatsType.NUM_NULLS));
|
||||
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
|
||||
Collections.singletonList(colStatsDesc));
|
||||
job.getTasks().add(sqlTask);
|
||||
}
|
||||
|
||||
// column max value, min value and ndv
|
||||
for (String colName : colNames) {
|
||||
if (rowCount < backendIds.size() * NDV_MAX_SCAN_PER_TASK) {
|
||||
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName);
|
||||
StatsGranularity colGranularity = getPartitionGranularity(partitionId);
|
||||
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
|
||||
colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV));
|
||||
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
|
||||
Collections.singletonList(colStatsDesc));
|
||||
job.getTasks().add(sqlTask);
|
||||
} else {
|
||||
// divide subtasks by tablet
|
||||
List<Tablet> tablets = partition.getBaseIndex().getTablets();
|
||||
tablets.forEach(tablet -> {
|
||||
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(),
|
||||
tableId, partitionName, colName);
|
||||
StatsGranularity colGranularity = getTabletGranularity(tablet.getId());
|
||||
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
|
||||
colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV));
|
||||
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
|
||||
Collections.singletonList(colStatsDesc));
|
||||
job.getTasks().add(sqlTask);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void getColumnSizeSqlTask(StatisticsJob job, long rowCount,
|
||||
StatsCategory colCategory, StatsGranularity colGranularity) {
|
||||
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
|
||||
colGranularity, Arrays.asList(StatsType.MAX_SIZE, StatsType.AVG_SIZE));
|
||||
SQLStatisticsTask sqlTask;
|
||||
if (rowCount < MIN_SAMPLE_ROWS) {
|
||||
sqlTask = new SQLStatisticsTask(job.getId(), Collections.singletonList(colStatsDesc));
|
||||
} else {
|
||||
sqlTask = new SampleSQLStatisticsTask(job.getId(), Collections.singletonList(colStatsDesc));
|
||||
}
|
||||
job.getTasks().add(sqlTask);
|
||||
}
|
||||
|
||||
private StatsCategory getTableStatsCategory(long dbId, long tableId) {
|
||||
StatsCategory category = new StatsCategory();
|
||||
category.setCategory(StatsCategory.Category.TABLE);
|
||||
category.setDbId(dbId);
|
||||
category.setTableId(tableId);
|
||||
return category;
|
||||
}
|
||||
|
||||
private StatsCategory getPartitionStatsCategory(long dbId, long tableId, String partitionName) {
|
||||
StatsCategory category = new StatsCategory();
|
||||
category.setCategory(Category.PARTITION);
|
||||
category.setDbId(dbId);
|
||||
category.setTableId(tableId);
|
||||
category.setPartitionName(partitionName);
|
||||
return category;
|
||||
}
|
||||
|
||||
private StatsCategory getColumnStatsCategory(long dbId, long tableId, String columnName) {
|
||||
StatsCategory category = new StatsCategory();
|
||||
category.setDbId(dbId);
|
||||
category.setTableId(tableId);
|
||||
category.setColumnName(columnName);
|
||||
category.setCategory(Category.COLUMN);
|
||||
category.setColumnName(columnName);
|
||||
return category;
|
||||
}
|
||||
|
||||
private StatsCategory getColumnStatsCategory(long dbId, long tableId, String partitionName, String columnName) {
|
||||
StatsCategory category = new StatsCategory();
|
||||
category.setDbId(dbId);
|
||||
category.setTableId(tableId);
|
||||
category.setPartitionName(partitionName);
|
||||
category.setColumnName(columnName);
|
||||
category.setCategory(Category.COLUMN);
|
||||
category.setColumnName(columnName);
|
||||
return category;
|
||||
}
|
||||
|
||||
private StatsGranularity getTableGranularity(long tableId) {
|
||||
StatsGranularity granularity = new StatsGranularity();
|
||||
granularity.setTableId(tableId);
|
||||
granularity.setGranularity(Granularity.TABLE);
|
||||
return granularity;
|
||||
}
|
||||
|
||||
private StatsGranularity getPartitionGranularity(long partitionId) {
|
||||
StatsGranularity granularity = new StatsGranularity();
|
||||
granularity.setPartitionId(partitionId);
|
||||
granularity.setGranularity(Granularity.PARTITION);
|
||||
return granularity;
|
||||
}
|
||||
|
||||
private StatsGranularity getTabletGranularity(long tabletId) {
|
||||
StatsGranularity granularity = new StatsGranularity();
|
||||
granularity.setTabletId(tabletId);
|
||||
granularity.setGranularity(Granularity.TABLET);
|
||||
return granularity;
|
||||
}
|
||||
}
|
||||
@ -1,508 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.analysis.AlterColumnStatsStmt;
|
||||
import org.apache.doris.analysis.AlterTableStatsStmt;
|
||||
import org.apache.doris.analysis.DropTableStatsStmt;
|
||||
import org.apache.doris.analysis.ShowTableStatsStmt;
|
||||
import org.apache.doris.analysis.TableName;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.PartitionType;
|
||||
import org.apache.doris.catalog.Table;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.ErrorCode;
|
||||
import org.apache.doris.common.ErrorReport;
|
||||
import org.apache.doris.mysql.privilege.PrivPredicate;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
|
||||
import org.apache.doris.statistics.StatsGranularity.Granularity;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class StatisticsManager {
|
||||
|
||||
private final Statistics statistics;
|
||||
|
||||
public StatisticsManager() {
|
||||
statistics = new Statistics();
|
||||
}
|
||||
|
||||
public Statistics getStatistics() {
|
||||
return statistics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Support for deleting table or partition statistics.
|
||||
*
|
||||
* @param stmt get table name and partition name from it.
|
||||
*/
|
||||
public void dropStats(DropTableStatsStmt stmt) {
|
||||
Map<Long, Set<String>> tblIdToPartition = stmt.getTblIdToPartition();
|
||||
|
||||
if (tblIdToPartition != null && !tblIdToPartition.isEmpty()) {
|
||||
tblIdToPartition.forEach((tableId, partitions) -> {
|
||||
if (partitions == null || partitions.isEmpty()) {
|
||||
statistics.dropTableStats(tableId);
|
||||
} else {
|
||||
for (String partition : partitions) {
|
||||
statistics.dropPartitionStats(tableId, partition);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Alter table or partition stats. if partition name is not null, update partition stats.
|
||||
*
|
||||
* @param stmt alter table stats stmt
|
||||
* @throws AnalysisException if table or partition not exist
|
||||
*/
|
||||
public void alterTableStatistics(AlterTableStatsStmt stmt) throws AnalysisException {
|
||||
Table table = validateTableName(stmt.getTableName());
|
||||
List<String> partitionNames = stmt.getPartitionNames();
|
||||
Map<StatsType, String> statsTypeToValue = stmt.getStatsTypeToValue();
|
||||
|
||||
if (partitionNames.isEmpty()) {
|
||||
statistics.updateTableStats(table.getId(), statsTypeToValue);
|
||||
return;
|
||||
}
|
||||
|
||||
for (String partitionName : partitionNames) {
|
||||
partitionName = validatePartitionName(table, partitionName);
|
||||
statistics.updatePartitionStats(table.getId(), partitionName, statsTypeToValue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Alter column stats. if partition name is not null, update column of partition stats.
|
||||
*
|
||||
* @param stmt alter column stats stmt
|
||||
* @throws AnalysisException if table, column or partition not exist
|
||||
*/
|
||||
public void alterColumnStatistics(AlterColumnStatsStmt stmt) throws AnalysisException {
|
||||
Table table = validateTableName(stmt.getTableName());
|
||||
String colName = stmt.getColumnName();
|
||||
List<String> partitionNames = stmt.getPartitionNames();
|
||||
Map<StatsType, String> statsTypeToValue = stmt.getStatsTypeToValue();
|
||||
|
||||
if ((partitionNames.isEmpty()) && table instanceof OlapTable
|
||||
&& !((OlapTable) table).getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) {
|
||||
throw new AnalysisException("Partitioned table must specify partition name.");
|
||||
}
|
||||
|
||||
if (partitionNames.isEmpty()) {
|
||||
Column column = validateColumn(table, colName);
|
||||
Type colType = column.getType();
|
||||
statistics.updateColumnStats(table.getId(), colName, colType, statsTypeToValue);
|
||||
return;
|
||||
}
|
||||
|
||||
for (String partitionName : partitionNames) {
|
||||
validatePartitionName(table, partitionName);
|
||||
Column column = validateColumn(table, colName);
|
||||
Type colType = column.getType();
|
||||
statistics.updateColumnStats(table.getId(), partitionName, colName, colType, statsTypeToValue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update statistics. there are three types of statistics: column, table and column.
|
||||
*
|
||||
* @param statsTaskResults statistics task results
|
||||
* @throws AnalysisException if column, table or partition not exist
|
||||
*/
|
||||
public void updateStatistics(List<StatisticsTaskResult> statsTaskResults) throws AnalysisException {
|
||||
// tablet granularity stats(row count, max value, min value, ndv)
|
||||
Map<StatsType, Map<TaskResult, List<String>>> tabletStats = Maps.newHashMap();
|
||||
|
||||
for (StatisticsTaskResult statsTaskResult : statsTaskResults) {
|
||||
if (statsTaskResult != null) {
|
||||
List<TaskResult> taskResults = statsTaskResult.getTaskResults();
|
||||
|
||||
for (TaskResult result : taskResults) {
|
||||
validateResult(result);
|
||||
long tblId = result.getTableId();
|
||||
Map<StatsType, String> statsTypeToValue = result.getStatsTypeToValue();
|
||||
|
||||
if (result.getGranularity() == Granularity.TABLET) {
|
||||
statsTypeToValue.forEach((statsType, value) -> {
|
||||
if (tabletStats.containsKey(statsType)) {
|
||||
Map<TaskResult, List<String>> resultToValue = tabletStats.get(statsType);
|
||||
List<String> values = resultToValue.get(result);
|
||||
values.add(value);
|
||||
} else {
|
||||
Map<TaskResult, List<String>> resultToValue = Maps.newHashMap();
|
||||
List<String> values = Lists.newArrayList();
|
||||
values.add(value);
|
||||
resultToValue.put(result, values);
|
||||
tabletStats.put(statsType, resultToValue);
|
||||
}
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (result.getCategory()) {
|
||||
case TABLE:
|
||||
statistics.updateTableStats(tblId, statsTypeToValue);
|
||||
break;
|
||||
case PARTITION:
|
||||
String partitionName = result.getPartitionName();
|
||||
statistics.updatePartitionStats(tblId, partitionName, statsTypeToValue);
|
||||
break;
|
||||
case COLUMN:
|
||||
updateColumnStats(result, statsTypeToValue);
|
||||
break;
|
||||
default:
|
||||
throw new AnalysisException("Unknown stats category: " + result.getCategory());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// update tablet granularity stats
|
||||
updateTabletStats(tabletStats);
|
||||
}
|
||||
|
||||
private void updateColumnStats(TaskResult result, Map<StatsType, String> statsTypeToValue)
|
||||
throws AnalysisException {
|
||||
long dbId = result.getDbId();
|
||||
long tblId = result.getTableId();
|
||||
String partitionName = result.getPartitionName();
|
||||
String colName = result.getColumnName();
|
||||
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbId);
|
||||
OlapTable table = (OlapTable) db.getTableOrAnalysisException(tblId);
|
||||
Column column = table.getColumn(colName);
|
||||
Type colType = column.getType();
|
||||
|
||||
switch (result.getGranularity()) {
|
||||
case TABLE:
|
||||
statistics.updateColumnStats(tblId, colName, colType, statsTypeToValue);
|
||||
break;
|
||||
case PARTITION:
|
||||
statistics.updateColumnStats(tblId, partitionName, colName, colType, statsTypeToValue);
|
||||
break;
|
||||
default:
|
||||
// The tablet granularity is handle separately
|
||||
throw new AnalysisException("Unknown granularity: " + result.getGranularity());
|
||||
}
|
||||
}
|
||||
|
||||
private void updateTabletStats(Map<StatsType, Map<TaskResult, List<String>>> tabletStats)
|
||||
throws AnalysisException {
|
||||
for (Map.Entry<StatsType, Map<TaskResult, List<String>>> statsEntry : tabletStats.entrySet()) {
|
||||
StatsType statsType = statsEntry.getKey();
|
||||
Map<TaskResult, List<String>> resultToValue = statsEntry.getValue();
|
||||
|
||||
for (Map.Entry<TaskResult, List<String>> resultEntry : resultToValue.entrySet()) {
|
||||
TaskResult result = resultEntry.getKey();
|
||||
List<String> values = resultEntry.getValue();
|
||||
|
||||
switch (statsType) {
|
||||
case ROW_COUNT:
|
||||
updateTabletRowCount(result, values);
|
||||
break;
|
||||
case MAX_VALUE:
|
||||
updateTabletMaxValue(result, values);
|
||||
break;
|
||||
case MIN_VALUE:
|
||||
updateTabletMinValue(result, values);
|
||||
break;
|
||||
case NDV:
|
||||
updateTabletNDV(result, values);
|
||||
break;
|
||||
default:
|
||||
throw new AnalysisException("Unknown stats type: " + statsType);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the statistics of a table. if specified partition name, get the statistics of the partition.
|
||||
*
|
||||
* @param stmt statement
|
||||
* @return partition or table statistics
|
||||
* @throws AnalysisException statistics not exist
|
||||
*/
|
||||
public List<List<String>> showTableStatsList(ShowTableStatsStmt stmt) throws AnalysisException {
|
||||
String dbName = stmt.getDbName();
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbName);
|
||||
String tableName = stmt.getTableName();
|
||||
List<List<String>> result = Lists.newArrayList();
|
||||
|
||||
if (tableName != null) {
|
||||
Table table = db.getTableOrAnalysisException(tableName);
|
||||
// check priv
|
||||
if (!Env.getCurrentEnv().getAuth()
|
||||
.checkTblPriv(ConnectContext.get(), dbName, tableName, PrivPredicate.SHOW)) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "SHOW CREATE TABLE",
|
||||
ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(),
|
||||
dbName + ": " + tableName);
|
||||
}
|
||||
|
||||
List<String> partitionNames = stmt.getPartitionNames();
|
||||
|
||||
if (partitionNames.isEmpty()) {
|
||||
result.add(showTableStats(table));
|
||||
} else {
|
||||
for (String partitionName : partitionNames) {
|
||||
validatePartitionName(table, partitionName);
|
||||
result.add(showTableStats(table, partitionName));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (Table table : db.getTables()) {
|
||||
if (!Env.getCurrentEnv().getAuth()
|
||||
.checkTblPriv(ConnectContext.get(), dbName, table.getName(), PrivPredicate.SHOW)) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
result.add(showTableStats(table));
|
||||
} catch (AnalysisException e) {
|
||||
// ignore no stats table
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private List<String> showTableStats(Table table) throws AnalysisException {
|
||||
TableStats tableStats = statistics.getTableStats(table.getId());
|
||||
if (tableStats == null) {
|
||||
throw new AnalysisException("There is no statistics in this table:" + table.getName());
|
||||
}
|
||||
List<String> row = Lists.newArrayList();
|
||||
row.add(table.getName());
|
||||
row.addAll(tableStats.getShowInfo());
|
||||
return row;
|
||||
}
|
||||
|
||||
private List<String> showTableStats(Table table, String partitionName) throws AnalysisException {
|
||||
Map<String, PartitionStats> partitionStats = statistics.getPartitionStats(table.getId(), partitionName);
|
||||
PartitionStats partitionStat = partitionStats.get(partitionName);
|
||||
if (partitionStat == null) {
|
||||
throw new AnalysisException("There is no statistics in this partition:" + partitionName);
|
||||
}
|
||||
List<String> row = Lists.newArrayList();
|
||||
row.add(partitionName);
|
||||
row.addAll(partitionStat.getShowInfo());
|
||||
return row;
|
||||
}
|
||||
|
||||
private List<List<String>> showColumnStats(long tableId) throws AnalysisException {
|
||||
List<List<String>> result = Lists.newArrayList();
|
||||
Map<String, ColumnStat> columnStats = statistics.getColumnStats(tableId);
|
||||
columnStats.forEach((key, stats) -> {
|
||||
List<String> row = Lists.newArrayList();
|
||||
row.add(key);
|
||||
row.addAll(stats.getShowInfo());
|
||||
result.add(row);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
private List<List<String>> showColumnStats(long tableId, String partitionName) throws AnalysisException {
|
||||
List<List<String>> result = Lists.newArrayList();
|
||||
Map<String, ColumnStat> columnStats = statistics.getColumnStats(tableId, partitionName);
|
||||
columnStats.forEach((key, stats) -> {
|
||||
List<String> row = Lists.newArrayList();
|
||||
row.add(key);
|
||||
row.addAll(stats.getShowInfo());
|
||||
result.add(row);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
private void updateTabletRowCount(TaskResult result, List<String> values) throws AnalysisException {
|
||||
long statsValue = values.stream().filter(NumberUtils::isCreatable)
|
||||
.mapToLong(Long::parseLong).sum();
|
||||
|
||||
Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, String.valueOf(statsValue));
|
||||
|
||||
if (result.getCategory() == StatsCategory.Category.TABLE) {
|
||||
statistics.updateTableStats(result.getTableId(), statsTypeToValue);
|
||||
} else if (result.getCategory() == StatsCategory.Category.PARTITION) {
|
||||
statistics.updatePartitionStats(result.getTableId(), result.getPartitionName(), statsTypeToValue);
|
||||
}
|
||||
}
|
||||
|
||||
private void updateTabletMaxValue(TaskResult result, List<String> values) throws AnalysisException {
|
||||
Column column = getNotNullColumn(result);
|
||||
Type type = column.getType();
|
||||
String maxValue = getNumericMaxOrMinValue(values, type, true);
|
||||
|
||||
Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
|
||||
statsTypeToValue.put(StatsType.MAX_VALUE, maxValue);
|
||||
|
||||
updateTabletGranularityStats(result, type, statsTypeToValue);
|
||||
}
|
||||
|
||||
private void updateTabletMinValue(TaskResult result, List<String> values) throws AnalysisException {
|
||||
Column column = getNotNullColumn(result);
|
||||
Type type = column.getType();
|
||||
String minValue = getNumericMaxOrMinValue(values, type, false);
|
||||
|
||||
Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
|
||||
statsTypeToValue.put(StatsType.MIN_VALUE, minValue);
|
||||
|
||||
updateTabletGranularityStats(result, type, statsTypeToValue);
|
||||
}
|
||||
|
||||
private void updateTabletNDV(TaskResult result, List<String> values) throws AnalysisException {
|
||||
double statsValue = values.stream().filter(NumberUtils::isCreatable)
|
||||
.mapToLong(Long::parseLong).sum();
|
||||
|
||||
Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
|
||||
statsTypeToValue.put(StatsType.NDV, String.valueOf(statsValue));
|
||||
|
||||
Column column = getNotNullColumn(result);
|
||||
Type type = column.getType();
|
||||
updateTabletGranularityStats(result, type, statsTypeToValue);
|
||||
}
|
||||
|
||||
private void updateTabletGranularityStats(TaskResult result, Type columnType,
|
||||
Map<StatsType, String> statsTypeToValue) throws AnalysisException {
|
||||
if (result.getCategory() == StatsCategory.Category.TABLE) {
|
||||
statistics.updateColumnStats(result.getTableId(),
|
||||
result.getColumnName(), columnType, statsTypeToValue);
|
||||
} else if (result.getCategory() == StatsCategory.Category.PARTITION) {
|
||||
statistics.updateColumnStats(result.getTableId(), result.getPartitionName(),
|
||||
result.getColumnName(), columnType, statsTypeToValue);
|
||||
}
|
||||
}
|
||||
|
||||
private Table validateTableName(TableName dbTableName) throws AnalysisException {
|
||||
String dbName = dbTableName.getDb();
|
||||
String tableName = dbTableName.getTbl();
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbName);
|
||||
return db.getTableOrAnalysisException(tableName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Partition name is optional, if partition name is not null, it will be validated.
|
||||
*/
|
||||
private String validatePartitionName(Table table, String partitionName) throws AnalysisException {
|
||||
if (!table.isPartitioned() && !Strings.isNullOrEmpty(partitionName)) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_PARTITION_CLAUSE_ON_NONPARTITIONED,
|
||||
partitionName, table.getName());
|
||||
}
|
||||
|
||||
if (!Strings.isNullOrEmpty(partitionName) && table.getPartition(partitionName) == null) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_UNKNOWN_PARTITION,
|
||||
partitionName, table.getName());
|
||||
}
|
||||
|
||||
return partitionName;
|
||||
}
|
||||
|
||||
private Column validateColumn(Table table, String columnName) throws AnalysisException {
|
||||
Column column = table.getColumn(columnName);
|
||||
if (column == null) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, table.getName());
|
||||
}
|
||||
return column;
|
||||
}
|
||||
|
||||
private void validateResult(TaskResult result) throws AnalysisException {
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(result.getDbId());
|
||||
Table table = db.getTableOrAnalysisException(result.getTableId());
|
||||
|
||||
if (!Strings.isNullOrEmpty(result.getPartitionName())) {
|
||||
validatePartitionName(table, result.getPartitionName());
|
||||
}
|
||||
|
||||
if (!Strings.isNullOrEmpty(result.getColumnName())) {
|
||||
validateColumn(table, result.getColumnName());
|
||||
}
|
||||
|
||||
Map<StatsType, String> statsTypeToValue = result.getStatsTypeToValue();
|
||||
if (statsTypeToValue == null || statsTypeToValue.isEmpty()) {
|
||||
throw new AnalysisException("StatsTypeToValue is empty.");
|
||||
}
|
||||
}
|
||||
|
||||
private Column getNotNullColumn(TaskResult result) throws AnalysisException {
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(result.getDbId());
|
||||
Table table = db.getTableOrAnalysisException(result.getTableId());
|
||||
Column column = table.getColumn(result.getColumnName());
|
||||
if (column == null) {
|
||||
throw new AnalysisException("Column " + result.getColumnName() + " does not exist");
|
||||
}
|
||||
return column;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the max/min value of the column.
|
||||
*
|
||||
* @param values String List of values
|
||||
* @param type column type
|
||||
* @param maxOrMin true for max, false for min
|
||||
* @return the max/min value of the column.
|
||||
*/
|
||||
private String getNumericMaxOrMinValue(List<String> values, Type type, boolean maxOrMin) {
|
||||
if (type.isFixedPointType()) {
|
||||
long result = 0L;
|
||||
for (String value : values) {
|
||||
if (NumberUtils.isCreatable(value)) {
|
||||
long temp = Long.parseLong(value);
|
||||
if (maxOrMin) {
|
||||
result = Math.max(result, temp);
|
||||
} else {
|
||||
result = Math.min(result, temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
return String.valueOf(result);
|
||||
}
|
||||
|
||||
if (type.isFloatingPointType()) {
|
||||
double result = 0.0;
|
||||
for (String value : values) {
|
||||
if (NumberUtils.isCreatable(value)) {
|
||||
double temp = Double.parseDouble(value);
|
||||
if (maxOrMin) {
|
||||
result = Math.max(result, temp);
|
||||
} else {
|
||||
result = Math.min(result, temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
return String.valueOf(result);
|
||||
}
|
||||
|
||||
// is not numeric type
|
||||
values.sort(Comparator.naturalOrder());
|
||||
return values.size() > 0 ? values.get(values.size() - 1) : null;
|
||||
}
|
||||
}
|
||||
@ -1,172 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
/**
|
||||
* The StatisticsTask belongs to one StatisticsJob.
|
||||
* A job may be split into multiple tasks but a task can only belong to one job.
|
||||
*
|
||||
* @granularityDesc, @categoryDesc, @statsTypeList
|
||||
* These three attributes indicate which statistics this task is responsible for collecting.
|
||||
* In general, a task will collect more than one @StatsType at the same time
|
||||
* while all of types belong to the same @granularityDesc and @categoryDesc.
|
||||
* For example: the task is responsible for collecting min, max, ndv of t1.c1 in partition p1.
|
||||
* @granularityDesc: StatsGranularity=partition
|
||||
*/
|
||||
public abstract class StatisticsTask implements Callable<StatisticsTaskResult> {
|
||||
protected static final Logger LOG = LogManager.getLogger(StatisticsTask.class);
|
||||
|
||||
public enum TaskState {
|
||||
PENDING,
|
||||
RUNNING,
|
||||
FINISHED,
|
||||
FAILED
|
||||
}
|
||||
|
||||
protected long id = Env.getCurrentEnv().getNextId();
|
||||
protected long jobId;
|
||||
protected List<StatisticsDesc> statsDescs;
|
||||
protected TaskState taskState = TaskState.PENDING;
|
||||
|
||||
protected final long createTime = System.currentTimeMillis();
|
||||
protected long startTime = -1L;
|
||||
protected long finishTime = -1L;
|
||||
|
||||
public StatisticsTask(long jobId, List<StatisticsDesc> statsDescs) {
|
||||
this.jobId = jobId;
|
||||
this.statsDescs = statsDescs;
|
||||
}
|
||||
|
||||
public long getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(long id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public long getJobId() {
|
||||
return jobId;
|
||||
}
|
||||
|
||||
public List<StatisticsDesc> getStatsDescs() {
|
||||
return statsDescs;
|
||||
}
|
||||
|
||||
public TaskState getTaskState() {
|
||||
return taskState;
|
||||
}
|
||||
|
||||
public long getCreateTime() {
|
||||
return createTime;
|
||||
}
|
||||
|
||||
public long getStartTime() {
|
||||
return startTime;
|
||||
}
|
||||
|
||||
public long getFinishTime() {
|
||||
return finishTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Different statistics implement different collection methods.
|
||||
*
|
||||
* @return true if this task is finished, false otherwise
|
||||
* @throws Exception
|
||||
*/
|
||||
@Override
|
||||
public abstract StatisticsTaskResult call() throws Exception;
|
||||
|
||||
// please retain job lock firstly
|
||||
public void updateTaskState(TaskState newState) throws DdlException {
|
||||
LOG.info("To change statistics task(id={}) state from {} to {}", id, taskState, newState);
|
||||
String errorMsg = "Invalid statistics task state transition from ";
|
||||
|
||||
// PENDING -> RUNNING/FAILED
|
||||
if (taskState == TaskState.PENDING) {
|
||||
switch (newState) {
|
||||
case RUNNING:
|
||||
startTime = System.currentTimeMillis();
|
||||
break;
|
||||
case FAILED:
|
||||
finishTime = System.currentTimeMillis();
|
||||
break;
|
||||
default:
|
||||
throw new DdlException(errorMsg + taskState + " to " + newState);
|
||||
}
|
||||
} else if (taskState == TaskState.RUNNING) { // RUNNING -> FINISHED/FAILED
|
||||
switch (newState) {
|
||||
case FINISHED:
|
||||
case FAILED:
|
||||
finishTime = System.currentTimeMillis();
|
||||
break;
|
||||
default:
|
||||
throw new DdlException(errorMsg + taskState + " to " + newState);
|
||||
}
|
||||
} else { // unsupported state transition
|
||||
throw new DdlException(errorMsg + taskState + " to " + newState);
|
||||
}
|
||||
|
||||
LOG.info("Statistics task(id={}) state changed from {} to {}", id, taskState, newState);
|
||||
taskState = newState;
|
||||
}
|
||||
|
||||
protected void checkStatisticsDesc() throws DdlException {
|
||||
for (StatisticsDesc statsDesc : statsDescs) {
|
||||
if (statsDesc == null) {
|
||||
throw new DdlException("StatisticsDesc is null.");
|
||||
}
|
||||
|
||||
if (statsDesc.getStatsCategory() == null) {
|
||||
throw new DdlException("Category is null.");
|
||||
}
|
||||
|
||||
if (statsDesc.getStatsGranularity() == null) {
|
||||
throw new DdlException("Granularity is null.");
|
||||
}
|
||||
|
||||
Preconditions.checkState(statsDesc.getStatsCategory().getDbId() > 0L);
|
||||
Preconditions.checkState(statsDesc.getStatsCategory().getTableId() > 0L);
|
||||
}
|
||||
}
|
||||
|
||||
protected TaskResult createNewTaskResult(StatsCategory category, StatsGranularity granularity) {
|
||||
TaskResult result = new TaskResult();
|
||||
result.setDbId(category.getDbId());
|
||||
result.setTableId(category.getTableId());
|
||||
result.setPartitionName(category.getPartitionName());
|
||||
result.setColumnName(category.getColumnName());
|
||||
result.setCategory(category.getCategory());
|
||||
result.setGranularity(granularity.getGranularity());
|
||||
result.setStatsTypeToValue(Maps.newHashMap());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@ -1,132 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
|
||||
import org.apache.doris.statistics.StatsCategory.Category;
|
||||
import org.apache.doris.statistics.StatsGranularity.Granularity;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
public class StatisticsTaskResult {
|
||||
private List<TaskResult> taskResults;
|
||||
|
||||
public StatisticsTaskResult(List<TaskResult> taskResults) {
|
||||
this.taskResults = taskResults;
|
||||
}
|
||||
|
||||
public List<TaskResult> getTaskResults() {
|
||||
return taskResults;
|
||||
}
|
||||
|
||||
public void setTaskResults(List<TaskResult> taskResults) {
|
||||
this.taskResults = taskResults;
|
||||
}
|
||||
|
||||
public static class TaskResult {
|
||||
private long dbId = -1L;
|
||||
private long tableId = -1L;
|
||||
private String partitionName = "";
|
||||
private String columnName = "";
|
||||
|
||||
private Category category;
|
||||
private Granularity granularity;
|
||||
private Map<StatsType, String> statsTypeToValue;
|
||||
|
||||
public long getDbId() {
|
||||
return dbId;
|
||||
}
|
||||
|
||||
public void setDbId(long dbId) {
|
||||
this.dbId = dbId;
|
||||
}
|
||||
|
||||
public long getTableId() {
|
||||
return tableId;
|
||||
}
|
||||
|
||||
public void setTableId(long tableId) {
|
||||
this.tableId = tableId;
|
||||
}
|
||||
|
||||
public String getPartitionName() {
|
||||
return partitionName;
|
||||
}
|
||||
|
||||
public void setPartitionName(String partitionName) {
|
||||
this.partitionName = partitionName;
|
||||
}
|
||||
|
||||
public String getColumnName() {
|
||||
return columnName;
|
||||
}
|
||||
|
||||
public void setColumnName(String columnName) {
|
||||
this.columnName = columnName;
|
||||
}
|
||||
|
||||
public Category getCategory() {
|
||||
return category;
|
||||
}
|
||||
|
||||
public void setCategory(Category category) {
|
||||
this.category = category;
|
||||
}
|
||||
|
||||
public Granularity getGranularity() {
|
||||
return granularity;
|
||||
}
|
||||
|
||||
public void setGranularity(Granularity granularity) {
|
||||
this.granularity = granularity;
|
||||
}
|
||||
|
||||
public Map<StatsType, String> getStatsTypeToValue() {
|
||||
return statsTypeToValue;
|
||||
}
|
||||
|
||||
public void setStatsTypeToValue(Map<StatsType, String> statsTypeToValue) {
|
||||
this.statsTypeToValue = statsTypeToValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
TaskResult that = (TaskResult) o;
|
||||
return dbId == that.dbId
|
||||
&& tableId == that.tableId
|
||||
&& partitionName.equals(that.partitionName)
|
||||
&& columnName.equals(that.columnName)
|
||||
&& category == that.category
|
||||
&& granularity == that.granularity;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(dbId, tableId, partitionName,
|
||||
columnName, category, granularity);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,198 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.analysis.AnalyzeStmt;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.ThreadPoolManager;
|
||||
import org.apache.doris.common.util.MasterDaemon;
|
||||
import org.apache.doris.statistics.StatisticsJob.JobState;
|
||||
import org.apache.doris.statistics.StatisticsTask.TaskState;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Queues;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.CancellationException;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
/**
|
||||
* Schedule statistics task
|
||||
*/
|
||||
public class StatisticsTaskScheduler extends MasterDaemon {
|
||||
private static final Logger LOG = LogManager.getLogger(StatisticsTaskScheduler.class);
|
||||
|
||||
private final Queue<StatisticsTask> queue = Queues.newLinkedBlockingQueue();
|
||||
|
||||
public StatisticsTaskScheduler() {
|
||||
super("Statistics task scheduler",
|
||||
Config.statistic_task_scheduler_execution_interval_ms);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void runAfterCatalogReady() {
|
||||
// step1: task n concurrent tasks from the queue
|
||||
List<StatisticsTask> tasks = peek();
|
||||
|
||||
if (!tasks.isEmpty()) {
|
||||
ThreadPoolExecutor executor = ThreadPoolManager.newDaemonCacheThreadPool(tasks.size(),
|
||||
"statistic-pool", false);
|
||||
StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager();
|
||||
Map<Long, StatisticsJob> statisticsJobs = jobManager.getIdToStatisticsJob();
|
||||
Map<Long, List<Map<Long, Future<StatisticsTaskResult>>>> resultMap = Maps.newLinkedHashMap();
|
||||
|
||||
for (StatisticsTask task : tasks) {
|
||||
long jobId = task.getJobId();
|
||||
|
||||
if (checkJobIsValid(jobId)) {
|
||||
// step2: execute task and save task result
|
||||
Future<StatisticsTaskResult> future = executor.submit(task);
|
||||
StatisticsJob statisticsJob = statisticsJobs.get(jobId);
|
||||
|
||||
if (updateTaskAndJobState(task, statisticsJob)) {
|
||||
Map<Long, Future<StatisticsTaskResult>> taskInfo = Maps.newHashMap();
|
||||
taskInfo.put(task.getId(), future);
|
||||
List<Map<Long, Future<StatisticsTaskResult>>> jobInfo = resultMap
|
||||
.getOrDefault(jobId, Lists.newArrayList());
|
||||
jobInfo.add(taskInfo);
|
||||
resultMap.put(jobId, jobInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// step3: handle task results
|
||||
handleTaskResult(resultMap);
|
||||
}
|
||||
}
|
||||
|
||||
public void addTasks(List<StatisticsTask> statisticsTaskList) throws IllegalStateException {
|
||||
queue.addAll(statisticsTaskList);
|
||||
}
|
||||
|
||||
private List<StatisticsTask> peek() {
|
||||
List<StatisticsTask> tasks = Lists.newArrayList();
|
||||
int i = Config.cbo_concurrency_statistics_task_num;
|
||||
while (i > 0) {
|
||||
StatisticsTask task = queue.poll();
|
||||
if (task == null) {
|
||||
break;
|
||||
}
|
||||
tasks.add(task);
|
||||
i--;
|
||||
}
|
||||
return tasks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update task and job state
|
||||
*
|
||||
* @param task statistics task
|
||||
* @param job statistics job
|
||||
* @return true if update task and job state successfully.
|
||||
*/
|
||||
private boolean updateTaskAndJobState(StatisticsTask task, StatisticsJob job) {
|
||||
try {
|
||||
// update task state
|
||||
task.updateTaskState(TaskState.RUNNING);
|
||||
} catch (DdlException e) {
|
||||
LOG.info("Update statistics task state failed, taskId: " + task.getId(), e);
|
||||
}
|
||||
|
||||
try {
|
||||
// update job state
|
||||
if (task.getTaskState() != TaskState.RUNNING) {
|
||||
job.updateJobState(JobState.FAILED);
|
||||
} else {
|
||||
if (job.getJobState() == JobState.SCHEDULING) {
|
||||
job.updateJobState(JobState.RUNNING);
|
||||
}
|
||||
}
|
||||
} catch (DdlException e) {
|
||||
LOG.info("Update statistics job state failed, jobId: " + job.getId(), e);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void handleTaskResult(Map<Long, List<Map<Long, Future<StatisticsTaskResult>>>> resultMap) {
|
||||
StatisticsManager statsManager = Env.getCurrentEnv().getStatisticsManager();
|
||||
StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager();
|
||||
|
||||
resultMap.forEach((jobId, taskMapList) -> {
|
||||
if (checkJobIsValid(jobId)) {
|
||||
StatisticsJob statisticsJob = jobManager.getIdToStatisticsJob().get(jobId);
|
||||
Map<String, String> properties = statisticsJob.getProperties();
|
||||
long timeout = Long.parseLong(properties.get(AnalyzeStmt.CBO_STATISTICS_TASK_TIMEOUT_SEC));
|
||||
|
||||
// For tasks with tablet granularity,
|
||||
// we need aggregate calculations to get the results of the statistics,
|
||||
// so we need to put all the tasks together and handle the results together.
|
||||
List<StatisticsTaskResult> taskResults = Lists.newArrayList();
|
||||
|
||||
for (Map<Long, Future<StatisticsTaskResult>> taskInfos : taskMapList) {
|
||||
taskInfos.forEach((taskId, future) -> {
|
||||
String errorMsg = "";
|
||||
|
||||
try {
|
||||
StatisticsTaskResult taskResult = future.get(timeout, TimeUnit.SECONDS);
|
||||
taskResults.add(taskResult);
|
||||
} catch (TimeoutException | ExecutionException | InterruptedException
|
||||
| CancellationException e) {
|
||||
errorMsg = e.getMessage();
|
||||
LOG.error("Failed to get statistics. jobId: {}, taskId: {}, e: {}", jobId, taskId, e);
|
||||
}
|
||||
|
||||
try {
|
||||
statisticsJob.updateJobInfoByTaskId(taskId, errorMsg);
|
||||
} catch (DdlException e) {
|
||||
LOG.info("Failed to update statistics job info. jobId: {}, e: {}", jobId, e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
statsManager.updateStatistics(taskResults);
|
||||
} catch (AnalysisException e) {
|
||||
LOG.info("Failed to update statistics. jobId: {}, e: {}", jobId, e);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public boolean checkJobIsValid(Long jobId) {
|
||||
StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager();
|
||||
StatisticsJob statisticsJob = jobManager.getIdToStatisticsJob().get(jobId);
|
||||
if (statisticsJob == null) {
|
||||
return false;
|
||||
}
|
||||
JobState jobState = statisticsJob.getJobState();
|
||||
return jobState != JobState.CANCELLED && jobState != JobState.FAILED;
|
||||
}
|
||||
}
|
||||
@ -1,309 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.util.Util;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
/**
|
||||
* There are the statistics of table.
|
||||
* The table stats are mainly used to provide input for the Optimizer's cost model.
|
||||
* The description of table stats are following:
|
||||
* - @rowCount: The row count of table.
|
||||
* - @dataSize: The data size of table.
|
||||
* - @nameToColumnStats: <@String columnName, @ColumnStats columnStats>
|
||||
* <p>
|
||||
* Each column in the Table will have corresponding @ColumnStats.
|
||||
* Those @ColumnStats are recorded in @nameToColumnStats form of MAP.
|
||||
* This facilitates the optimizer to quickly find the corresponding:
|
||||
* - @ColumnStats based on the column name.
|
||||
* - @rowCount: The row count of table.
|
||||
* - @dataSize: The data size of table.
|
||||
* <p>
|
||||
* The granularity of the statistics is whole table.
|
||||
* For example: "@rowCount = 1000" means that the row count is 1000 in the whole table.
|
||||
* <p>
|
||||
* After the statistics task is successfully completed, update the TableStats,
|
||||
* TableStats should not be updated in any other way.
|
||||
*/
|
||||
public class TableStats {
|
||||
public static final StatsType DATA_SIZE = StatsType.DATA_SIZE;
|
||||
public static final StatsType ROW_COUNT = StatsType.ROW_COUNT;
|
||||
|
||||
private static final Predicate<Double> DESIRED_ROW_COUNT_PRED = (v) -> v >= -1L;
|
||||
private static final Predicate<Long> DESIRED_DATA_SIZE_PRED = (v) -> v >= -1L;
|
||||
|
||||
private double rowCount = -1;
|
||||
private long dataSize = -1;
|
||||
private final Map<String, PartitionStats> nameToPartitionStats = Maps.newConcurrentMap();
|
||||
private final Map<String, ColumnStat> nameToColumnStats = Maps.newConcurrentMap();
|
||||
|
||||
/**
|
||||
* Return a default partition statistic.
|
||||
*/
|
||||
public static TableStats getDefaultTableStats() {
|
||||
return new TableStats();
|
||||
}
|
||||
|
||||
public TableStats() {
|
||||
}
|
||||
|
||||
public TableStats(double rowCount, long dataSize) {
|
||||
this.rowCount = rowCount;
|
||||
this.dataSize = dataSize;
|
||||
}
|
||||
|
||||
public double getRowCount() {
|
||||
// '!isEmpty()' is added mainly because the result returns 0
|
||||
// instead of the expected -1 when nameToPartitionStats is empty.
|
||||
if (rowCount == -1 && !nameToPartitionStats.isEmpty()) {
|
||||
return nameToPartitionStats.values().stream()
|
||||
.filter(partitionStats -> partitionStats.getRowCount() != -1)
|
||||
.mapToLong(PartitionStats::getRowCount).sum();
|
||||
}
|
||||
return rowCount;
|
||||
}
|
||||
|
||||
public long getDataSize() {
|
||||
if (dataSize == -1 && !nameToPartitionStats.isEmpty()) {
|
||||
return nameToPartitionStats.values().stream()
|
||||
.filter(partitionStats -> partitionStats.getDataSize() != -1)
|
||||
.mapToLong(PartitionStats::getDataSize).sum();
|
||||
}
|
||||
return dataSize;
|
||||
}
|
||||
|
||||
public Map<String, PartitionStats> getNameToPartitionStats() {
|
||||
return nameToPartitionStats;
|
||||
}
|
||||
|
||||
public Map<String, ColumnStat> getNameToColumnStats() {
|
||||
if (nameToColumnStats.isEmpty()) {
|
||||
return getAggPartitionColStats();
|
||||
}
|
||||
return nameToColumnStats;
|
||||
}
|
||||
|
||||
public PartitionStats getPartitionStats(String partitionName) {
|
||||
return nameToPartitionStats.get(partitionName);
|
||||
}
|
||||
|
||||
/**
|
||||
* If the partition statistics do not exist, the default statistics will be returned.
|
||||
*/
|
||||
public PartitionStats getPartitionStatsOrDefault(String columnName) {
|
||||
return nameToPartitionStats.getOrDefault(columnName,
|
||||
PartitionStats.getDefaultPartitionStats());
|
||||
}
|
||||
|
||||
/**
|
||||
* If the column statistics do not exist, the default statistics will be returned.
|
||||
*/
|
||||
public ColumnStat getColumnStatsOrDefault(String columnName) {
|
||||
return nameToColumnStats.getOrDefault(columnName,
|
||||
ColumnStat.getDefaultColumnStats());
|
||||
}
|
||||
|
||||
/**
|
||||
* After the statistics task is successfully completed, update the statistics of the partition,
|
||||
* statistics should not be updated in any other way.
|
||||
*/
|
||||
public void updateTableStats(Map<StatsType, String> statsTypeToValue) throws AnalysisException {
|
||||
for (Map.Entry<StatsType, String> entry : statsTypeToValue.entrySet()) {
|
||||
if (entry.getKey() == ROW_COUNT) {
|
||||
rowCount = Util.getDoublePropertyOrDefault(entry.getValue(), rowCount,
|
||||
DESIRED_ROW_COUNT_PRED, ROW_COUNT + " should >= -1");
|
||||
} else if (entry.getKey() == DATA_SIZE) {
|
||||
dataSize = Util.getLongPropertyOrDefault(entry.getValue(), dataSize,
|
||||
DESIRED_DATA_SIZE_PRED, DATA_SIZE + " should >= -1");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* After the statistics task is successfully completed, update the statistics of the partition,
|
||||
* statistics should not be updated in any other way.
|
||||
*/
|
||||
public void updatePartitionStats(String partitionName, Map<StatsType, String> statsTypeToValue)
|
||||
throws AnalysisException {
|
||||
PartitionStats partitionStats = getNotNullPartitionStats(partitionName);
|
||||
partitionStats.updatePartitionStats(statsTypeToValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* After the statistics task is successfully completed, update the statistics of the column,
|
||||
* statistics should not be updated in any other way.
|
||||
*/
|
||||
public void updateColumnStats(String columnName, Type columnType, Map<StatsType, String> statsTypeToValue)
|
||||
throws AnalysisException {
|
||||
ColumnStat columnStat = getColumnStats(columnName);
|
||||
columnStat.updateStats(columnType, statsTypeToValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* If partition stats is not exist, create a new one.
|
||||
*
|
||||
* @param partitionName partition name
|
||||
* @return @PartitionStats
|
||||
*/
|
||||
private PartitionStats getNotNullPartitionStats(String partitionName) {
|
||||
PartitionStats partitionStat = nameToPartitionStats.get(partitionName);
|
||||
if (partitionStat == null) {
|
||||
partitionStat = new PartitionStats();
|
||||
nameToPartitionStats.put(partitionName, partitionStat);
|
||||
}
|
||||
return partitionStat;
|
||||
}
|
||||
|
||||
/**
|
||||
* If column stats is not exist, create a new one.
|
||||
*
|
||||
* @param columnName column name
|
||||
* @return @ColumnStats
|
||||
*/
|
||||
private ColumnStat getNotNullColumnStats(String columnName) {
|
||||
ColumnStat columnStat = nameToColumnStats.get(columnName);
|
||||
if (columnStat == null) {
|
||||
columnStat = new ColumnStat();
|
||||
nameToColumnStats.put(columnName, columnStat);
|
||||
}
|
||||
return columnStat;
|
||||
}
|
||||
|
||||
public ColumnStat getColumnStats(String columnName) {
|
||||
ColumnStat columnStat = nameToColumnStats.get(columnName);
|
||||
if (columnStat == null) {
|
||||
columnStat = new ColumnStat();
|
||||
nameToColumnStats.put(columnName, columnStat);
|
||||
}
|
||||
return columnStat;
|
||||
}
|
||||
|
||||
public ColumnStat getColumnStatCopy(String columnName) {
|
||||
ColumnStat columnStat = getColumnStats(columnName);
|
||||
return columnStat.copy();
|
||||
}
|
||||
|
||||
public List<String> getShowInfo() {
|
||||
List<String> result = Lists.newArrayList();
|
||||
result.add(Double.toString(getRowCount()));
|
||||
result.add(Long.toString(getDataSize()));
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<String> getShowInfo(String partitionName) {
|
||||
PartitionStats partitionStats = nameToPartitionStats.get(partitionName);
|
||||
return partitionStats.getShowInfo();
|
||||
}
|
||||
|
||||
private Map<String, ColumnStat> getAggPartitionColStats() {
|
||||
Map<String, ColumnStat> aggColumnStats = new HashMap<>();
|
||||
for (PartitionStats partitionStats : nameToPartitionStats.values()) {
|
||||
partitionStats.getNameToColumnStats().forEach((colName, columnStats) -> {
|
||||
if (!aggColumnStats.containsKey(colName)) {
|
||||
aggColumnStats.put(colName, columnStats.copy());
|
||||
} else {
|
||||
ColumnStat tblColStats = aggColumnStats.get(colName);
|
||||
mergePartitionColumnStats(tblColStats, columnStats);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return aggColumnStats;
|
||||
}
|
||||
|
||||
private void mergePartitionColumnStats(ColumnStat leftStats, ColumnStat rightStats) {
|
||||
if (leftStats.getNdv() == -1) {
|
||||
if (rightStats.getNdv() != -1) {
|
||||
leftStats.setNdv(rightStats.getNdv());
|
||||
}
|
||||
} else {
|
||||
if (rightStats.getNdv() != -1) {
|
||||
double ndv = leftStats.getNdv() + rightStats.getNdv();
|
||||
leftStats.setNdv(ndv);
|
||||
}
|
||||
}
|
||||
|
||||
if (leftStats.getAvgSizeByte() == -1) {
|
||||
if (rightStats.getAvgSizeByte() != -1) {
|
||||
leftStats.setAvgSizeByte(rightStats.getAvgSizeByte());
|
||||
}
|
||||
} else {
|
||||
if (rightStats.getAvgSizeByte() != -1) {
|
||||
double avgSize = (leftStats.getAvgSizeByte() + rightStats.getAvgSizeByte()) / 2;
|
||||
leftStats.setAvgSizeByte(avgSize);
|
||||
}
|
||||
}
|
||||
|
||||
if (leftStats.getMaxSizeByte() == -1) {
|
||||
if (rightStats.getMaxSizeByte() != -1) {
|
||||
leftStats.setMaxSizeByte(rightStats.getMaxSizeByte());
|
||||
}
|
||||
} else {
|
||||
if (rightStats.getMaxSizeByte() != -1) {
|
||||
double maxSize = Math.max(leftStats.getMaxSizeByte(), rightStats.getMaxSizeByte());
|
||||
leftStats.setMaxSizeByte(maxSize);
|
||||
}
|
||||
}
|
||||
|
||||
if (leftStats.getNumNulls() == -1) {
|
||||
if (rightStats.getNumNulls() != -1) {
|
||||
leftStats.setNumNulls(rightStats.getNumNulls());
|
||||
}
|
||||
} else {
|
||||
if (rightStats.getNumNulls() != -1) {
|
||||
double numNulls = leftStats.getNumNulls() + rightStats.getNumNulls();
|
||||
leftStats.setNumNulls(numNulls);
|
||||
}
|
||||
}
|
||||
|
||||
if (Double.isNaN(leftStats.getMinValue())) {
|
||||
if (!Double.isNaN(rightStats.getMinValue())) {
|
||||
leftStats.setMinValue(rightStats.getMinValue());
|
||||
}
|
||||
} else if (!Double.isNaN(rightStats.getMinValue())) {
|
||||
double minValue = Math.max(leftStats.getMinValue(), rightStats.getMinValue());
|
||||
leftStats.setMinValue(minValue);
|
||||
}
|
||||
|
||||
if (Double.isNaN(leftStats.getMaxValue())) {
|
||||
if (!Double.isNaN(rightStats.getMaxValue())) {
|
||||
leftStats.setMaxValue(rightStats.getMaxValue());
|
||||
}
|
||||
} else if (!Double.isNaN(rightStats.getMaxValue())) {
|
||||
double maxValue = Math.min(leftStats.getMaxValue(), rightStats.getMaxValue());
|
||||
leftStats.setMaxValue(maxValue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is for unit test.
|
||||
*/
|
||||
public void putColumnStats(String name, ColumnStat columnStat) {
|
||||
nameToColumnStats.put(name, columnStat);
|
||||
}
|
||||
}
|
||||
@ -36,15 +36,10 @@ import org.apache.doris.nereids.types.IntegerType;
|
||||
import org.apache.doris.nereids.util.MemoTestUtils;
|
||||
import org.apache.doris.nereids.util.PlanConstructor;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.statistics.ColumnStat;
|
||||
import org.apache.doris.statistics.StatisticsManager;
|
||||
import org.apache.doris.statistics.StatsDeriveResult;
|
||||
import org.apache.doris.statistics.TableStats;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import mockit.Expectations;
|
||||
import mockit.Mock;
|
||||
import mockit.MockUp;
|
||||
import mockit.Mocked;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
@ -60,8 +55,6 @@ public class DeriveStatsJobTest {
|
||||
ConnectContext context;
|
||||
@Mocked
|
||||
Env env;
|
||||
@Mocked
|
||||
StatisticsManager statisticsManager;
|
||||
|
||||
SlotReference slot1;
|
||||
|
||||
@ -81,14 +74,6 @@ public class DeriveStatsJobTest {
|
||||
}
|
||||
|
||||
private LogicalOlapScan constructOlapSCan() throws AnalysisException {
|
||||
ColumnStat columnStats1 = new ColumnStat(10, 0, 0, 5,
|
||||
Double.NaN, Double.NaN);
|
||||
new MockUp<TableStats>(TableStats.class) {
|
||||
@Mock
|
||||
public ColumnStat getColumnStats(String columnName) {
|
||||
return columnStats1;
|
||||
}
|
||||
};
|
||||
|
||||
long tableId1 = 0;
|
||||
|
||||
|
||||
@ -36,11 +36,9 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalTopN;
|
||||
import org.apache.doris.nereids.types.IntegerType;
|
||||
import org.apache.doris.nereids.util.PlanConstructor;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.statistics.ColumnStat;
|
||||
import org.apache.doris.statistics.ColumnStatistic;
|
||||
import org.apache.doris.statistics.ColumnStatisticBuilder;
|
||||
import org.apache.doris.statistics.StatsDeriveResult;
|
||||
import org.apache.doris.statistics.TableStats;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
@ -238,13 +236,7 @@ public class StatsCalculatorTest {
|
||||
|
||||
@Test
|
||||
public void testOlapScan(@Mocked ConnectContext context) {
|
||||
ColumnStat columnStat1 = new ColumnStat();
|
||||
columnStat1.setNdv(10);
|
||||
columnStat1.setNumNulls(5);
|
||||
long tableId1 = 0;
|
||||
TableStats tableStats1 = new TableStats();
|
||||
tableStats1.putColumnStats("c1", columnStat1);
|
||||
|
||||
List<String> qualifier = ImmutableList.of("test", "t");
|
||||
SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier);
|
||||
|
||||
|
||||
@ -1,154 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ColumnStatsTest {
|
||||
private ColumnStat columnStatsUnderTest;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
columnStatsUnderTest = new ColumnStat();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateStats() throws Exception {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.MAX_SIZE, "8");
|
||||
statsTypeToValue.put(StatsType.MIN_VALUE, "0");
|
||||
statsTypeToValue.put(StatsType.MAX_VALUE, "100");
|
||||
|
||||
// Run the test
|
||||
columnStatsUnderTest.updateStats(columnType, statsTypeToValue);
|
||||
|
||||
// Verify the results
|
||||
double maxSize = columnStatsUnderTest.getMaxSizeByte();
|
||||
Assert.assertEquals(8, maxSize, 0.1);
|
||||
|
||||
double minValue = columnStatsUnderTest.getMinValue();
|
||||
Assert.assertEquals(0, minValue, 0.1);
|
||||
|
||||
double maxValue = columnStatsUnderTest.getMaxValue();
|
||||
Assert.assertEquals(100, maxValue, 0.1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateStats_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.AVG_SIZE, "abc");
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class,
|
||||
() -> columnStatsUnderTest.updateStats(columnType, statsTypeToValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetShowInfo() throws AnalysisException {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.NDV, "1");
|
||||
statsTypeToValue.put(StatsType.AVG_SIZE, "8");
|
||||
statsTypeToValue.put(StatsType.MAX_SIZE, "8");
|
||||
statsTypeToValue.put(StatsType.NUM_NULLS, "2");
|
||||
statsTypeToValue.put(StatsType.MIN_VALUE, "0");
|
||||
statsTypeToValue.put(StatsType.MAX_VALUE, "1000");
|
||||
|
||||
columnStatsUnderTest.updateStats(columnType, statsTypeToValue);
|
||||
String[] expectedInfo = {"1.0", "8.0", "8.0", "2.0", "0.0", "1000.0"};
|
||||
|
||||
// Run the test
|
||||
List<String> showInfo = columnStatsUnderTest.getShowInfo();
|
||||
String[] result = showInfo.toArray(new String[0]);
|
||||
|
||||
// Verify the results
|
||||
Assert.assertArrayEquals(expectedInfo, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetDefaultColumnStats() {
|
||||
// Run the test
|
||||
ColumnStat defaultColumnStats = ColumnStat.getDefaultColumnStats();
|
||||
|
||||
// Verify the results
|
||||
double ndv = defaultColumnStats.getNdv();
|
||||
Assert.assertEquals(-1L, ndv, 0.1);
|
||||
|
||||
double avgSize = defaultColumnStats.getAvgSizeByte();
|
||||
Assert.assertEquals(-1.0f, avgSize, 0.0001);
|
||||
|
||||
double maxSize = defaultColumnStats.getMaxSizeByte();
|
||||
Assert.assertEquals(-1L, maxSize, 0.1);
|
||||
|
||||
double maxValue = defaultColumnStats.getMaxValue();
|
||||
Assert.assertEquals(Double.NaN, maxValue, 0.1);
|
||||
|
||||
double minValue = defaultColumnStats.getMinValue();
|
||||
Assert.assertEquals(Double.NaN, minValue, 0.1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAggColumnStats() throws Exception {
|
||||
// Setup
|
||||
ColumnStat columnStats = ColumnStat.getDefaultColumnStats();
|
||||
ColumnStat other = new ColumnStat(1L, 4.0f, 5L, 10L,
|
||||
Double.NaN,
|
||||
Double.NaN);
|
||||
|
||||
// Run the test
|
||||
ColumnStat aggColumnStats = ColumnStat.mergeColumnStats(columnStats, other);
|
||||
|
||||
// Verify the results
|
||||
double ndv = aggColumnStats.getNdv();
|
||||
// 0(default) + 1
|
||||
Assert.assertEquals(1L, ndv, 0.1);
|
||||
|
||||
double avgSize = aggColumnStats.getAvgSizeByte();
|
||||
// (0.0f + 4.0f) / 2
|
||||
Assert.assertEquals(4.0f, avgSize, 0.0001);
|
||||
|
||||
double maxSize = aggColumnStats.getMaxSizeByte();
|
||||
Assert.assertEquals(5L, maxSize, 0.1);
|
||||
|
||||
double numNulls = aggColumnStats.getNumNulls();
|
||||
Assert.assertEquals(10L, numNulls, 0.1);
|
||||
|
||||
double minValue = aggColumnStats.getMinValue();
|
||||
// null VS sMinValue
|
||||
Assert.assertEquals(Double.NaN, minValue, 0.1);
|
||||
|
||||
double maxValue = aggColumnStats.getMaxValue();
|
||||
// null VS sMaxValue
|
||||
Assert.assertEquals(Double.NaN, maxValue, 0.1);
|
||||
}
|
||||
}
|
||||
@ -1,136 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class PartitionStatsTest {
|
||||
private PartitionStats partitionStatsUnderTest;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
partitionStatsUnderTest = new PartitionStats();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdatePartitionStats() throws Exception {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
|
||||
|
||||
// Run the test
|
||||
partitionStatsUnderTest.updatePartitionStats(statsTypeToValue);
|
||||
|
||||
// Verify the results
|
||||
long rowCount = partitionStatsUnderTest.getRowCount();
|
||||
Assert.assertEquals(1000, rowCount);
|
||||
|
||||
long dataSize = partitionStatsUnderTest.getDataSize();
|
||||
Assert.assertEquals(10240, dataSize);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdatePartitionStats_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.AVG_SIZE, "8");
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "abc");
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class,
|
||||
() -> partitionStatsUnderTest.updatePartitionStats(statsTypeToValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateColumnStats() throws Exception {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.NDV, "1");
|
||||
statsTypeToValue.put(StatsType.AVG_SIZE, "8");
|
||||
statsTypeToValue.put(StatsType.MAX_SIZE, "8");
|
||||
statsTypeToValue.put(StatsType.NUM_NULLS, "2");
|
||||
statsTypeToValue.put(StatsType.MIN_VALUE, "0");
|
||||
statsTypeToValue.put(StatsType.MAX_VALUE, "1000");
|
||||
|
||||
// Run the test
|
||||
partitionStatsUnderTest.updateColumnStats("columnName", columnType, statsTypeToValue);
|
||||
ColumnStat columnStats = partitionStatsUnderTest.getColumnStats("columnName");
|
||||
|
||||
// Verify the results
|
||||
double ndv = columnStats.getNdv();
|
||||
Assert.assertEquals(1, ndv, 0.1);
|
||||
|
||||
double avgSize = columnStats.getAvgSizeByte();
|
||||
Assert.assertEquals(8.0f, avgSize, 0.0001);
|
||||
|
||||
double maxSize = columnStats.getMaxSizeByte();
|
||||
Assert.assertEquals(8, maxSize, 0.1);
|
||||
|
||||
double maxValue = columnStats.getMaxValue();
|
||||
Assert.assertEquals(1000, maxValue, 0.1);
|
||||
|
||||
double minValue = columnStats.getMinValue();
|
||||
Assert.assertEquals(0, minValue, 0.1);
|
||||
|
||||
double numNulls = columnStats.getNumNulls();
|
||||
Assert.assertEquals(2, numNulls, 0.1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateColumnStats_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.AVG_SIZE, "abc");
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(
|
||||
AnalysisException.class, () -> partitionStatsUnderTest
|
||||
.updateColumnStats("columnName", columnType, statsTypeToValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetShowInfo() throws AnalysisException {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
|
||||
|
||||
partitionStatsUnderTest.updatePartitionStats(statsTypeToValue);
|
||||
String[] expectedInfo = {"1000", "10240"};
|
||||
|
||||
// Run the test
|
||||
List<String> showInfo = partitionStatsUnderTest.getShowInfo();
|
||||
String[] result = showInfo.toArray(new String[0]);
|
||||
|
||||
// Run the test
|
||||
Assert.assertArrayEquals(expectedInfo, result);
|
||||
}
|
||||
}
|
||||
@ -1,207 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.HashDistributionInfo;
|
||||
import org.apache.doris.catalog.KeysType;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.PartitionInfo;
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.jmockit.Deencapsulation;
|
||||
import org.apache.doris.datasource.InternalCatalog;
|
||||
import org.apache.doris.statistics.util.InternalQuery;
|
||||
import org.apache.doris.statistics.util.InternalQueryResult;
|
||||
|
||||
import mockit.Mock;
|
||||
import mockit.MockUp;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
|
||||
public class SQLStatisticsTaskTest {
|
||||
private SQLStatisticsTask sqlStatisticsTaskUnderTest;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
StatsCategory statsCategory = new StatsCategory();
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
List<StatsType> statsTypes = Collections.singletonList(StatsType.ROW_COUNT);
|
||||
sqlStatisticsTaskUnderTest = new SQLStatisticsTask(0L,
|
||||
Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes)));
|
||||
|
||||
InternalCatalog catalog = Env.getCurrentInternalCatalog();
|
||||
Column column = new Column("columnName", PrimitiveType.STRING);
|
||||
OlapTable tableName = new OlapTable(0L, "tableName",
|
||||
Collections.singletonList(column), KeysType.AGG_KEYS,
|
||||
new PartitionInfo(), new HashDistributionInfo());
|
||||
Database database = new Database(0L, "db");
|
||||
database.createTable(tableName);
|
||||
|
||||
ConcurrentHashMap<String, Database> fullNameToDb = new ConcurrentHashMap<>();
|
||||
fullNameToDb.put("cluster:db", database);
|
||||
Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb);
|
||||
|
||||
ConcurrentHashMap<Long, Database> idToDb = new ConcurrentHashMap<>();
|
||||
idToDb.put(0L, database);
|
||||
Deencapsulation.setField(catalog, "idToDb", idToDb);
|
||||
|
||||
List<String> columns = Collections.singletonList("row_count");
|
||||
List<PrimitiveType> types = Arrays.asList(PrimitiveType.STRING,
|
||||
PrimitiveType.INT, PrimitiveType.FLOAT,
|
||||
PrimitiveType.DOUBLE, PrimitiveType.BIGINT);
|
||||
InternalQueryResult queryResult = new InternalQueryResult();
|
||||
InternalQueryResult.ResultRow resultRow =
|
||||
new InternalQueryResult.ResultRow(columns, types, Collections.singletonList("1000"));
|
||||
queryResult.getResultRows().add(resultRow);
|
||||
|
||||
new MockUp<InternalQuery>(InternalQuery.class) {
|
||||
@Mock
|
||||
public InternalQueryResult query() {
|
||||
return queryResult;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConstructQuery() throws Exception {
|
||||
// Setup
|
||||
String expectedSQL = "SELECT COUNT(1) AS row_count FROM tableName;";
|
||||
|
||||
StatsCategory statsCategory = new StatsCategory();
|
||||
statsCategory.setCategory(StatsCategory.Category.TABLE);
|
||||
statsCategory.setDbId(0L);
|
||||
statsCategory.setTableId(0L);
|
||||
statsCategory.setPartitionName("partitionName");
|
||||
statsCategory.setColumnName("columnName");
|
||||
statsCategory.setStatsValue("statsValue");
|
||||
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
|
||||
statsGranularity.setTableId(0L);
|
||||
statsGranularity.setPartitionId(0L);
|
||||
statsGranularity.setTabletId(0L);
|
||||
|
||||
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
|
||||
Collections.singletonList(StatsType.ROW_COUNT));
|
||||
|
||||
// Run the test
|
||||
String result = sqlStatisticsTaskUnderTest.constructQuery(statsDesc);
|
||||
|
||||
// Verify the results
|
||||
Assert.assertEquals(expectedSQL, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConstructQuery_ThrowsDdlException() {
|
||||
// Setup
|
||||
StatsCategory statsCategory = new StatsCategory();
|
||||
statsCategory.setCategory(StatsCategory.Category.TABLE);
|
||||
statsCategory.setDbId(0L);
|
||||
statsCategory.setTableId(0L);
|
||||
statsCategory.setPartitionName("partitionName");
|
||||
statsCategory.setColumnName("columnName");
|
||||
statsCategory.setStatsValue("statsValue");
|
||||
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
|
||||
statsGranularity.setTableId(0L);
|
||||
statsGranularity.setPartitionId(0L);
|
||||
statsGranularity.setTabletId(0L);
|
||||
|
||||
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
|
||||
Collections.singletonList(StatsType.UNKNOWN));
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(DdlException.class,
|
||||
() -> sqlStatisticsTaskUnderTest.constructQuery(statsDesc));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExecuteQuery() throws Exception {
|
||||
// Setup
|
||||
StatsCategory statsCategory = new StatsCategory();
|
||||
statsCategory.setCategory(StatsCategory.Category.TABLE);
|
||||
statsCategory.setDbId(0L);
|
||||
statsCategory.setTableId(0L);
|
||||
statsCategory.setPartitionName("partitionName");
|
||||
statsCategory.setColumnName("columnName");
|
||||
statsCategory.setStatsValue("statsValue");
|
||||
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
|
||||
statsGranularity.setTableId(0L);
|
||||
statsGranularity.setPartitionId(0L);
|
||||
statsGranularity.setTabletId(0L);
|
||||
|
||||
StatisticsTaskResult.TaskResult expectedResult = new StatisticsTaskResult.TaskResult();
|
||||
expectedResult.setDbId(0L);
|
||||
expectedResult.setTableId(0L);
|
||||
expectedResult.setPartitionName("partitionName");
|
||||
expectedResult.setColumnName("columnName");
|
||||
expectedResult.setCategory(StatsCategory.Category.TABLE);
|
||||
expectedResult.setGranularity(StatsGranularity.Granularity.TABLE);
|
||||
HashMap<StatsType, String> hashMap = new HashMap<>();
|
||||
hashMap.put(StatsType.ROW_COUNT, "1000");
|
||||
expectedResult.setStatsTypeToValue(hashMap);
|
||||
|
||||
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
|
||||
Collections.singletonList(StatsType.ROW_COUNT));
|
||||
|
||||
// Run the test
|
||||
StatisticsTaskResult.TaskResult result = sqlStatisticsTaskUnderTest.executeQuery(statsDesc);
|
||||
|
||||
// Verify the results
|
||||
Assert.assertEquals(expectedResult, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExecuteQuery_ThrowsException() {
|
||||
// Setup
|
||||
StatsCategory statsCategory = new StatsCategory();
|
||||
statsCategory.setCategory(StatsCategory.Category.TABLE);
|
||||
statsCategory.setDbId(0L);
|
||||
statsCategory.setTableId(0L);
|
||||
statsCategory.setPartitionName("partitionName");
|
||||
statsCategory.setColumnName("columnName");
|
||||
statsCategory.setStatsValue("statsValue");
|
||||
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
|
||||
statsGranularity.setTableId(0L);
|
||||
statsGranularity.setPartitionId(0L);
|
||||
statsGranularity.setTabletId(0L);
|
||||
|
||||
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
|
||||
Arrays.asList(StatsType.NDV, StatsType.MAX_VALUE, StatsType.MIN_VALUE));
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(Exception.class,
|
||||
() -> sqlStatisticsTaskUnderTest.executeQuery(statsDesc));
|
||||
}
|
||||
}
|
||||
@ -1,205 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.HashDistributionInfo;
|
||||
import org.apache.doris.catalog.KeysType;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.PartitionInfo;
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.jmockit.Deencapsulation;
|
||||
import org.apache.doris.datasource.InternalCatalog;
|
||||
import org.apache.doris.statistics.StatsCategory.Category;
|
||||
import org.apache.doris.statistics.StatsGranularity.Granularity;
|
||||
import org.apache.doris.statistics.util.InternalQuery;
|
||||
import org.apache.doris.statistics.util.InternalQueryResult;
|
||||
|
||||
import mockit.Mock;
|
||||
import mockit.MockUp;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
public class SampleSQLStatisticsTaskTest {
|
||||
|
||||
private SampleSQLStatisticsTask sampleSQLStatisticsTaskUnderTest;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
InternalCatalog catalog = Env.getCurrentInternalCatalog();
|
||||
Column column = new Column("columnName", PrimitiveType.STRING);
|
||||
OlapTable tableName = new OlapTable(0L, "tableName",
|
||||
Collections.singletonList(column), KeysType.AGG_KEYS,
|
||||
new PartitionInfo(), new HashDistributionInfo());
|
||||
Database database = new Database(0L, "db");
|
||||
database.createTable(tableName);
|
||||
|
||||
ConcurrentHashMap<String, Database> fullNameToDb = new ConcurrentHashMap<>();
|
||||
fullNameToDb.put("cluster:db", database);
|
||||
Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb);
|
||||
|
||||
ConcurrentHashMap<Long, Database> idToDb = new ConcurrentHashMap<>();
|
||||
idToDb.put(0L, database);
|
||||
Deencapsulation.setField(catalog, "idToDb", idToDb);
|
||||
|
||||
List<String> columns = Collections.singletonList("row_count");
|
||||
List<PrimitiveType> types = Arrays.asList(PrimitiveType.STRING,
|
||||
PrimitiveType.INT, PrimitiveType.FLOAT,
|
||||
PrimitiveType.DOUBLE, PrimitiveType.BIGINT);
|
||||
InternalQueryResult queryResult = new InternalQueryResult();
|
||||
InternalQueryResult.ResultRow resultRow =
|
||||
new InternalQueryResult.ResultRow(columns, types, Collections.singletonList("1000"));
|
||||
queryResult.getResultRows().add(resultRow);
|
||||
|
||||
StatsCategory statsCategory = new StatsCategory();
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
List<StatsType> statsTypes = Collections.singletonList(StatsType.ROW_COUNT);
|
||||
sampleSQLStatisticsTaskUnderTest = new SampleSQLStatisticsTask(0L,
|
||||
Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes)));
|
||||
|
||||
new MockUp<InternalQuery>(InternalQuery.class) {
|
||||
@Mock
|
||||
public InternalQueryResult query() {
|
||||
return queryResult;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetQueryParams() throws Exception {
|
||||
// Setup
|
||||
Map<String, String> expectedResult = new HashMap<>();
|
||||
expectedResult.put("table", "tableName");
|
||||
expectedResult.put("partition", "partitionName");
|
||||
expectedResult.put("column", "columnName");
|
||||
expectedResult.put("percent", "10");
|
||||
|
||||
StatsCategory category = new StatsCategory();
|
||||
category.setCategory(Category.TABLE);
|
||||
category.setDbId(0L);
|
||||
category.setTableId(0L);
|
||||
category.setPartitionName("partitionName");
|
||||
category.setColumnName("columnName");
|
||||
category.setStatsValue("statsValue");
|
||||
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
statsGranularity.setGranularity(Granularity.TABLE);
|
||||
statsGranularity.setTableId(0L);
|
||||
statsGranularity.setPartitionId(0L);
|
||||
statsGranularity.setTabletId(0L);
|
||||
|
||||
StatisticsDesc statsDesc = new StatisticsDesc(category, statsGranularity,
|
||||
Collections.singletonList(StatsType.ROW_COUNT));
|
||||
|
||||
// Run the test
|
||||
Map<String, String> result = sampleSQLStatisticsTaskUnderTest.getQueryParams(statsDesc);
|
||||
|
||||
// Verify the results
|
||||
Assert.assertEquals(expectedResult, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetQueryParams_ThrowsDdlException() {
|
||||
// Setup
|
||||
StatsCategory category = new StatsCategory();
|
||||
category.setCategory(Category.TABLE);
|
||||
category.setDbId(-1L);
|
||||
category.setTableId(0L);
|
||||
category.setPartitionName("partitionName");
|
||||
category.setColumnName("columnName");
|
||||
category.setStatsValue("statsValue");
|
||||
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
statsGranularity.setGranularity(Granularity.PARTITION);
|
||||
statsGranularity.setTableId(0L);
|
||||
statsGranularity.setPartitionId(0L);
|
||||
statsGranularity.setTabletId(0L);
|
||||
|
||||
StatisticsDesc statsDesc = new StatisticsDesc(category, statsGranularity,
|
||||
Collections.singletonList(StatsType.ROW_COUNT));
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(DdlException.class,
|
||||
() -> sampleSQLStatisticsTaskUnderTest.getQueryParams(statsDesc));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConstructQuery() throws Exception {
|
||||
// Setup
|
||||
String expectedSQL = "SELECT COUNT(1) AS row_count FROM tableName TABLESAMPLE(10 PERCENT);";
|
||||
|
||||
StatsCategory statsCategory = new StatsCategory();
|
||||
statsCategory.setCategory(StatsCategory.Category.TABLE);
|
||||
statsCategory.setDbId(0L);
|
||||
statsCategory.setTableId(0L);
|
||||
statsCategory.setPartitionName("partitionName");
|
||||
statsCategory.setColumnName("columnName");
|
||||
statsCategory.setStatsValue("statsValue");
|
||||
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
|
||||
statsGranularity.setTableId(0L);
|
||||
statsGranularity.setPartitionId(0L);
|
||||
statsGranularity.setTabletId(0L);
|
||||
|
||||
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
|
||||
Collections.singletonList(StatsType.ROW_COUNT));
|
||||
|
||||
// Run the test
|
||||
String result = sampleSQLStatisticsTaskUnderTest.constructQuery(statsDesc);
|
||||
|
||||
// Verify the results
|
||||
Assert.assertEquals(expectedSQL, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExecuteQuery_ThrowsException() {
|
||||
// Setup
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
|
||||
statsGranularity.setTableId(0L);
|
||||
statsGranularity.setPartitionId(0L);
|
||||
statsGranularity.setTabletId(0L);
|
||||
|
||||
StatsCategory statsCategory = new StatsCategory();
|
||||
statsCategory.setCategory(StatsCategory.Category.TABLE);
|
||||
statsCategory.setDbId(0L);
|
||||
statsCategory.setTableId(0L);
|
||||
statsCategory.setPartitionName("partitionName");
|
||||
statsCategory.setColumnName("columnName");
|
||||
statsCategory.setStatsValue("statsValue");
|
||||
|
||||
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
|
||||
Arrays.asList(StatsType.NDV, StatsType.MAX_VALUE, StatsType.MIN_VALUE));
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(Exception.class,
|
||||
() -> sampleSQLStatisticsTaskUnderTest.executeQuery(statsDesc));
|
||||
}
|
||||
}
|
||||
@ -1,182 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.HashDistributionInfo;
|
||||
import org.apache.doris.catalog.KeysType;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.PartitionInfo;
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.common.jmockit.Deencapsulation;
|
||||
import org.apache.doris.datasource.InternalCatalog;
|
||||
import org.apache.doris.system.SystemInfoService;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import mockit.Mock;
|
||||
import mockit.MockUp;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
public class StatisticsJobSchedulerTest {
|
||||
private StatisticsJob statisticsJob;
|
||||
|
||||
private StatisticsJobScheduler statisticsJobSchedulerUnderTest;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
HashSet<Long> tblIds = Sets.newHashSet();
|
||||
tblIds.add(0L);
|
||||
tblIds.add(1L);
|
||||
|
||||
Map<Long, List<String>> tableIdToColumnName = Maps.newHashMap();
|
||||
tableIdToColumnName.put(0L, Arrays.asList("c1", "c2"));
|
||||
tableIdToColumnName.put(1L, Arrays.asList("c1", "c2"));
|
||||
Map<Long, List<String>> tblIdToPartitionName = Maps.newHashMap();
|
||||
|
||||
statisticsJob = new StatisticsJob(0L, tblIds, tblIdToPartitionName,
|
||||
tableIdToColumnName, null);
|
||||
statisticsJobSchedulerUnderTest = new StatisticsJobScheduler();
|
||||
statisticsJobSchedulerUnderTest.addPendingJob(statisticsJob);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRunAfterCatalogReady() {
|
||||
// Setup
|
||||
Column col1 = new Column("c1", PrimitiveType.STRING);
|
||||
Column col2 = new Column("c2", PrimitiveType.INT);
|
||||
|
||||
OlapTable tbl1 = new OlapTable(0L, "tbl1", Arrays.asList(col1, col2),
|
||||
KeysType.AGG_KEYS, new PartitionInfo(), new HashDistributionInfo());
|
||||
OlapTable tbl2 = new OlapTable(1L, "tbl2", Arrays.asList(col1, col2),
|
||||
KeysType.DUP_KEYS, new PartitionInfo(), new HashDistributionInfo());
|
||||
|
||||
Database database = new Database(0L, "db");
|
||||
database.createTable(tbl1);
|
||||
database.createTable(tbl2);
|
||||
|
||||
InternalCatalog catalog = Env.getCurrentInternalCatalog();
|
||||
ConcurrentHashMap<String, Database> fullNameToDb = new ConcurrentHashMap<>();
|
||||
fullNameToDb.put("cluster:db", database);
|
||||
Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb);
|
||||
|
||||
ConcurrentHashMap<Long, Database> idToDb = new ConcurrentHashMap<>();
|
||||
idToDb.put(0L, database);
|
||||
Deencapsulation.setField(catalog, "idToDb", idToDb);
|
||||
|
||||
new MockUp<SystemInfoService>(SystemInfoService.class) {
|
||||
@Mock
|
||||
public List<Long> getBackendIds(boolean needAlive) {
|
||||
return Collections.singletonList(1L);
|
||||
}
|
||||
};
|
||||
|
||||
new MockUp<OlapTable>(OlapTable.class) {
|
||||
@Mock
|
||||
public long getDataSize() {
|
||||
return 1L;
|
||||
}
|
||||
};
|
||||
|
||||
// Run the test
|
||||
statisticsJobSchedulerUnderTest.runAfterCatalogReady();
|
||||
|
||||
/*
|
||||
* expected results:
|
||||
* mateTask(2):
|
||||
* - tbl1:
|
||||
* - task1:
|
||||
* - data_size
|
||||
* - max_size(c2)
|
||||
* - avg_size(c2)
|
||||
* - tbl2:
|
||||
* - task:
|
||||
* - row_count
|
||||
* - data_size
|
||||
* - max_size(c2)
|
||||
* - avg_size(c2)
|
||||
*
|
||||
* sqlTask(11):
|
||||
* - tbl1:
|
||||
* - task:
|
||||
* - ndv(c1)
|
||||
* - min_value(c1)
|
||||
* - max_value(c1)
|
||||
* - task:
|
||||
* - ndv(c2)
|
||||
* - min_value(c2)
|
||||
* - max_value(c2)
|
||||
* - task:
|
||||
* - max_size(c1)
|
||||
* - avg_size(c1)
|
||||
* - task:
|
||||
* - num_nulls(c1)
|
||||
* - task:
|
||||
* - num_nulls(c2)
|
||||
* - task
|
||||
* - row_count
|
||||
* - tbl2:
|
||||
* - task:
|
||||
* - ndv(c1)
|
||||
* - min_value(c1)
|
||||
* - max_value(c1)
|
||||
* - task:
|
||||
* - ndv(c2)
|
||||
* - min_value(c2)
|
||||
* - max_value(c2)
|
||||
* - task:
|
||||
* - max_size(c1)
|
||||
* - avg_size(c1)
|
||||
* - task:
|
||||
* - num_nulls(c1)
|
||||
* - task:
|
||||
* - num_nulls(c2)
|
||||
*/
|
||||
|
||||
// Verify the results
|
||||
List<StatisticsTask> tasks = statisticsJob.getTasks();
|
||||
Assert.assertEquals(13, tasks.size());
|
||||
|
||||
int sqlTaskCount = 0;
|
||||
int metaTaskCount = 0;
|
||||
|
||||
for (StatisticsTask task : tasks) {
|
||||
if (task instanceof SQLStatisticsTask) {
|
||||
sqlTaskCount++;
|
||||
} else if (task instanceof MetaStatisticsTask) {
|
||||
metaTaskCount++;
|
||||
} else {
|
||||
Assert.fail("Unknown task type.");
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertEquals(2, metaTaskCount);
|
||||
Assert.assertEquals(11, sqlTaskCount);
|
||||
}
|
||||
}
|
||||
@ -1,120 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.statistics.StatisticsJob.JobState;
|
||||
import org.apache.doris.statistics.StatisticsTask.TaskState;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class StatisticsJobTest {
|
||||
private StatisticsJob statisticsJobUnderTest;
|
||||
|
||||
private StatisticsTask statisticsTaskUnderTest;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
HashSet<Long> tblIds = new HashSet<>(Collections.singletonList(0L));
|
||||
Map<Long, List<String>> tblIdToPartitionName = Maps.newHashMap();
|
||||
Map<Long, List<String>> tableIdToColumnName = Maps.newHashMap();
|
||||
statisticsJobUnderTest = new StatisticsJob(0L, tblIds, tblIdToPartitionName,
|
||||
tableIdToColumnName, new HashMap<>());
|
||||
|
||||
StatsCategory statsCategory = new StatsCategory();
|
||||
StatsGranularity statsGranularity = new StatsGranularity();
|
||||
List<StatsType> statsTypes = Collections.singletonList(StatsType.ROW_COUNT);
|
||||
statisticsTaskUnderTest = new SQLStatisticsTask(0L,
|
||||
Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes)));
|
||||
|
||||
List<StatisticsTask> tasks = statisticsJobUnderTest.getTasks();
|
||||
tasks.add(statisticsTaskUnderTest);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateJobState() throws Exception {
|
||||
// Run the test
|
||||
statisticsJobUnderTest.updateJobState(JobState.SCHEDULING);
|
||||
|
||||
// Verify the results
|
||||
JobState jobState = statisticsJobUnderTest.getJobState();
|
||||
Assert.assertEquals(JobState.SCHEDULING, jobState);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateJobState_ThrowsDdlException() {
|
||||
// Run the test
|
||||
Assert.assertThrows(DdlException.class,
|
||||
() -> statisticsJobUnderTest.updateJobState(JobState.RUNNING));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateJobInfoByTaskId() throws Exception {
|
||||
// Setup
|
||||
statisticsJobUnderTest.updateJobState(JobState.SCHEDULING);
|
||||
statisticsJobUnderTest.updateJobState(JobState.RUNNING);
|
||||
statisticsTaskUnderTest.updateTaskState(TaskState.RUNNING);
|
||||
|
||||
// Run the test
|
||||
long taskId = statisticsTaskUnderTest.getId();
|
||||
statisticsJobUnderTest.updateJobInfoByTaskId(taskId, "");
|
||||
|
||||
// Verify the results
|
||||
JobState jobState = statisticsJobUnderTest.getJobState();
|
||||
Assert.assertEquals(JobState.FINISHED, jobState);
|
||||
|
||||
TaskState taskState = statisticsTaskUnderTest.getTaskState();
|
||||
Assert.assertEquals(TaskState.FINISHED, taskState);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateJobInfoByTaskIdFailed() throws Exception {
|
||||
// Setup
|
||||
statisticsJobUnderTest.updateJobState(JobState.SCHEDULING);
|
||||
statisticsJobUnderTest.updateJobState(JobState.RUNNING);
|
||||
statisticsTaskUnderTest.updateTaskState(TaskState.RUNNING);
|
||||
|
||||
// Run the test
|
||||
long taskId = statisticsTaskUnderTest.getId();
|
||||
statisticsJobUnderTest.updateJobInfoByTaskId(taskId, "errorMsg");
|
||||
|
||||
// Verify the results
|
||||
JobState jobState = statisticsJobUnderTest.getJobState();
|
||||
Assert.assertEquals(JobState.FAILED, jobState);
|
||||
|
||||
TaskState taskState = statisticsTaskUnderTest.getTaskState();
|
||||
Assert.assertEquals(TaskState.FAILED, taskState);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateJobInfoByTaskId_ThrowsDdlException() {
|
||||
// Run the test
|
||||
long taskId = statisticsTaskUnderTest.getId();
|
||||
Assert.assertThrows(DdlException.class,
|
||||
() -> statisticsJobUnderTest.updateJobInfoByTaskId(taskId, ""));
|
||||
}
|
||||
}
|
||||
@ -1,167 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.analysis.DropTableStatsStmt;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.HashDistributionInfo;
|
||||
import org.apache.doris.catalog.KeysType;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.PartitionInfo;
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.jmockit.Deencapsulation;
|
||||
import org.apache.doris.datasource.InternalCatalog;
|
||||
import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
|
||||
import org.apache.doris.statistics.StatsCategory.Category;
|
||||
import org.apache.doris.statistics.StatsGranularity.Granularity;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import mockit.Expectations;
|
||||
import mockit.Mocked;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
|
||||
public class StatisticsManagerTest {
|
||||
private StatisticsManager statisticsManagerUnderTest;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
Column col1 = new Column("c1", PrimitiveType.STRING);
|
||||
Column col2 = new Column("c2", PrimitiveType.INT);
|
||||
OlapTable tbl1 = new OlapTable(0L, "tbl1", Arrays.asList(col1, col2), KeysType.AGG_KEYS,
|
||||
new PartitionInfo(), new HashDistributionInfo());
|
||||
OlapTable tbl2 = new OlapTable(1L, "tbl2", Arrays.asList(col1, col2), KeysType.DUP_KEYS,
|
||||
new PartitionInfo(), new HashDistributionInfo());
|
||||
Database database = new Database(0L, "db");
|
||||
database.createTable(tbl1);
|
||||
database.createTable(tbl2);
|
||||
|
||||
InternalCatalog catalog = Env.getCurrentInternalCatalog();
|
||||
ConcurrentHashMap<String, Database> fullNameToDb = new ConcurrentHashMap<>();
|
||||
fullNameToDb.put("cluster:db", database);
|
||||
Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb);
|
||||
|
||||
ConcurrentHashMap<Long, Database> idToDb = new ConcurrentHashMap<>();
|
||||
idToDb.put(0L, database);
|
||||
Deencapsulation.setField(catalog, "idToDb", idToDb);
|
||||
|
||||
statisticsManagerUnderTest = new StatisticsManager();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateStatistics() throws Exception {
|
||||
// Setup
|
||||
TaskResult taskResult = new TaskResult();
|
||||
taskResult.setDbId(0L);
|
||||
taskResult.setTableId(0L);
|
||||
taskResult.setCategory(Category.TABLE);
|
||||
taskResult.setGranularity(Granularity.TABLE);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
|
||||
taskResult.setStatsTypeToValue(statsTypeToValue);
|
||||
|
||||
List<StatisticsTaskResult> statsTaskResults = Collections.singletonList(
|
||||
new StatisticsTaskResult(Collections.singletonList(taskResult)));
|
||||
|
||||
// Run the test
|
||||
statisticsManagerUnderTest.updateStatistics(statsTaskResults);
|
||||
Statistics statistics = statisticsManagerUnderTest.getStatistics();
|
||||
TableStats tableStats = statistics.getTableStats(0L);
|
||||
|
||||
// Verify the results
|
||||
double rowCount = tableStats.getRowCount();
|
||||
Assert.assertEquals(1000L, rowCount, 0.1);
|
||||
|
||||
long dataSize = tableStats.getDataSize();
|
||||
Assert.assertEquals(10240L, dataSize);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateStatistics_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
TaskResult taskResult = new TaskResult();
|
||||
taskResult.setDbId(0L);
|
||||
taskResult.setTableId(1L);
|
||||
taskResult.setPartitionName("partitionName");
|
||||
taskResult.setColumnName("columnName");
|
||||
taskResult.setCategory(Category.TABLE);
|
||||
taskResult.setGranularity(Granularity.TABLE);
|
||||
taskResult.setStatsTypeToValue(new HashMap<>());
|
||||
List<StatisticsTaskResult> statsTaskResults = Collections.singletonList(
|
||||
new StatisticsTaskResult(Collections.singletonList(taskResult)));
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class,
|
||||
() -> statisticsManagerUnderTest.updateStatistics(statsTaskResults));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDropStats(@Mocked DropTableStatsStmt stmt) throws AnalysisException {
|
||||
TaskResult taskResult = new TaskResult();
|
||||
taskResult.setDbId(0L);
|
||||
taskResult.setTableId(0L);
|
||||
taskResult.setCategory(Category.TABLE);
|
||||
taskResult.setGranularity(Granularity.TABLE);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
|
||||
taskResult.setStatsTypeToValue(statsTypeToValue);
|
||||
|
||||
List<StatisticsTaskResult> statsTaskResults = Collections.singletonList(
|
||||
new StatisticsTaskResult(Collections.singletonList(taskResult)));
|
||||
statisticsManagerUnderTest.updateStatistics(statsTaskResults);
|
||||
|
||||
Map<Long, Set<String>> tblIdToPartition = Maps.newHashMap();
|
||||
tblIdToPartition.put(0L, null);
|
||||
|
||||
new Expectations() {
|
||||
{
|
||||
stmt.getTblIdToPartition();
|
||||
this.minTimes = 0;
|
||||
this.result = tblIdToPartition;
|
||||
}
|
||||
};
|
||||
|
||||
// Run the test
|
||||
statisticsManagerUnderTest.dropStats(stmt);
|
||||
|
||||
// Verify the results
|
||||
Statistics statistics = statisticsManagerUnderTest.getStatistics();
|
||||
TableStats statsOrDefault = statistics.getTableStatsOrDefault(0L);
|
||||
|
||||
double rowCount = statsOrDefault.getRowCount();
|
||||
Assert.assertEquals(-1.0f, rowCount, 0.0001);
|
||||
|
||||
double dataSize = statsOrDefault.getDataSize();
|
||||
Assert.assertEquals(-1.0f, dataSize, 0.0001);
|
||||
}
|
||||
}
|
||||
@ -1,267 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class StatisticsTest {
|
||||
private Statistics statisticsUnderTest;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
statisticsUnderTest = new Statistics();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateTableStats() throws Exception {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
|
||||
// Run the test
|
||||
statisticsUnderTest.updateTableStats(0L, statsTypeToValue);
|
||||
long rowCount = (long) statisticsUnderTest.getTableStats(0L).getRowCount();
|
||||
|
||||
// Verify the results
|
||||
Assert.assertEquals(1000L, rowCount);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateTableStats_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "-100");
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class,
|
||||
() -> statisticsUnderTest.updateTableStats(0L, statsTypeToValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdatePartitionStats() throws Exception {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
|
||||
// Run the test
|
||||
statisticsUnderTest.updatePartitionStats(0L, "partitionName", statsTypeToValue);
|
||||
Map<String, PartitionStats> partitionStats = statisticsUnderTest
|
||||
.getPartitionStats(0L, "partitionName");
|
||||
long rowCount = partitionStats.get("partitionName").getRowCount();
|
||||
|
||||
// Verify the results
|
||||
Assert.assertEquals(1000L, rowCount);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdatePartitionStats_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "-100");
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest
|
||||
.updatePartitionStats(0L, "partitionName", statsTypeToValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateTableColumnStats() throws Exception {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.NUM_NULLS, "1000");
|
||||
|
||||
// Run the test
|
||||
statisticsUnderTest.updateColumnStats(0L, "columnName", columnType, statsTypeToValue);
|
||||
Map<String, ColumnStat> columnStats = statisticsUnderTest.getColumnStats(0L);
|
||||
long numNulls = (long) columnStats.get("columnName").getNumNulls();
|
||||
|
||||
// Verify the results
|
||||
Assert.assertEquals(1000L, numNulls);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateTableColumnStats_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.MAX_VALUE, "ABC");
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest
|
||||
.updateColumnStats(0L, "columnName", columnType, statsTypeToValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdatePartitionColumnStats() throws Exception {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.NUM_NULLS, "1000");
|
||||
|
||||
// Run the test
|
||||
statisticsUnderTest.updateColumnStats(0L, "partitionName",
|
||||
"columnName", columnType, statsTypeToValue);
|
||||
Map<String, ColumnStat> columnStats = statisticsUnderTest
|
||||
.getColumnStats(0L, "partitionName");
|
||||
long numNulls = (long) columnStats.get("columnName").getNumNulls();
|
||||
|
||||
// Verify the results
|
||||
Assert.assertEquals(1000L, numNulls);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdatePartitionColumnStats_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "ABC");
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest.updateColumnStats(
|
||||
0L, "partitionName", "columnName", columnType, statsTypeToValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableStats() throws Exception {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statisticsUnderTest.updateTableStats(0L, statsTypeToValue);
|
||||
|
||||
// Run the test
|
||||
TableStats result = statisticsUnderTest.getTableStats(0L);
|
||||
|
||||
// Verify the results
|
||||
double rowCount = result.getRowCount();
|
||||
Assert.assertEquals(1000, rowCount, 0.1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableStats_ThrowsAnalysisException() {
|
||||
// Verify the results
|
||||
Assert.assertThrows(AnalysisException.class,
|
||||
() -> statisticsUnderTest.getTableStats(0L));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetPartitionStats() throws Exception {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statisticsUnderTest.updatePartitionStats(0L, "partitionName", statsTypeToValue);
|
||||
|
||||
// Run the test
|
||||
Map<String, PartitionStats> result = statisticsUnderTest.getPartitionStats(0L);
|
||||
|
||||
// Verify the results
|
||||
PartitionStats partitionStats = result.get("partitionName");
|
||||
long rowCount = partitionStats.getRowCount();
|
||||
Assert.assertEquals(1000, rowCount);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetPartitionStats1_ThrowsAnalysisException() {
|
||||
// Verify the results
|
||||
Assert.assertThrows(AnalysisException.class,
|
||||
() -> statisticsUnderTest.getPartitionStats(0L));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetPartitionStatsWithName() throws Exception {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statisticsUnderTest.updatePartitionStats(0L, "partitionName", statsTypeToValue);
|
||||
|
||||
// Run the test
|
||||
Map<String, PartitionStats> result = statisticsUnderTest
|
||||
.getPartitionStats(0L, "partitionName");
|
||||
|
||||
// Verify the results
|
||||
PartitionStats partitionStats = result.get("partitionName");
|
||||
long rowCount = partitionStats.getRowCount();
|
||||
Assert.assertEquals(1000, rowCount);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetPartitionStatsWithName_ThrowsAnalysisException() {
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest
|
||||
.getPartitionStats(0L, "partitionName"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableColumnStats() throws Exception {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.NUM_NULLS, "1000");
|
||||
statisticsUnderTest.updateColumnStats(0L, "columnName", columnType, statsTypeToValue);
|
||||
|
||||
// Run the test
|
||||
Map<String, ColumnStat> result = statisticsUnderTest.getColumnStats(0L);
|
||||
|
||||
// Verify the results
|
||||
ColumnStat columnStats = result.get("columnName");
|
||||
double numNulls = columnStats.getNumNulls();
|
||||
Assert.assertEquals(1000, numNulls, 0.1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableColumnStats_ThrowsAnalysisException() {
|
||||
// Verify the results
|
||||
Assert.assertThrows(AnalysisException.class,
|
||||
() -> statisticsUnderTest.getColumnStats(0L));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetPartitionColumnStats() throws Exception {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.NUM_NULLS, "1000");
|
||||
statisticsUnderTest.updateColumnStats(0L, "partitionName",
|
||||
"columnName", columnType, statsTypeToValue);
|
||||
|
||||
// Run the test
|
||||
Map<String, ColumnStat> result = statisticsUnderTest
|
||||
.getColumnStats(0L, "partitionName");
|
||||
|
||||
// Verify the results
|
||||
ColumnStat columnStats = result.get("columnName");
|
||||
double numNulls = columnStats.getNumNulls();
|
||||
Assert.assertEquals(1000, numNulls, 0.1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetPartitionColumnStats_ThrowsAnalysisException() {
|
||||
// Verify the results
|
||||
Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest
|
||||
.getColumnStats(0L, "partitionName"));
|
||||
}
|
||||
}
|
||||
@ -1,182 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class TableStatsTest {
|
||||
private TableStats tableStatsUnderTest;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
tableStatsUnderTest = new TableStats();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateTableStats() throws Exception {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
|
||||
|
||||
// Run the test
|
||||
tableStatsUnderTest.updateTableStats(statsTypeToValue);
|
||||
|
||||
// Verify the results
|
||||
double rowCount = tableStatsUnderTest.getRowCount();
|
||||
Assert.assertEquals(1000, rowCount, 0.01);
|
||||
|
||||
long dataSize = tableStatsUnderTest.getDataSize();
|
||||
Assert.assertEquals(10240, dataSize);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateTableStats_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.AVG_SIZE, "8");
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "abc");
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class,
|
||||
() -> tableStatsUnderTest.updateTableStats(statsTypeToValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdatePartitionStats() throws Exception {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
|
||||
|
||||
// Run the test
|
||||
tableStatsUnderTest.updatePartitionStats("partitionName", statsTypeToValue);
|
||||
PartitionStats partitionStats = tableStatsUnderTest.getNameToPartitionStats().get("partitionName");
|
||||
|
||||
// Verify the results
|
||||
long rowCount = partitionStats.getRowCount();
|
||||
Assert.assertEquals(1000, rowCount);
|
||||
|
||||
long dataSize = partitionStats.getDataSize();
|
||||
Assert.assertEquals(10240, dataSize);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdatePartitionStats_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "abc");
|
||||
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class, () -> tableStatsUnderTest
|
||||
.updatePartitionStats("partitionName", statsTypeToValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateColumnStats() throws Exception {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.NDV, "1");
|
||||
statsTypeToValue.put(StatsType.AVG_SIZE, "8");
|
||||
statsTypeToValue.put(StatsType.MAX_SIZE, "8");
|
||||
statsTypeToValue.put(StatsType.NUM_NULLS, "2");
|
||||
statsTypeToValue.put(StatsType.MIN_VALUE, "0");
|
||||
statsTypeToValue.put(StatsType.MAX_VALUE, "1000");
|
||||
|
||||
// Run the test
|
||||
tableStatsUnderTest.updateColumnStats("columnName", columnType, statsTypeToValue);
|
||||
ColumnStat columnStats = tableStatsUnderTest.getColumnStats("columnName");
|
||||
|
||||
// Verify the results
|
||||
double ndv = columnStats.getNdv();
|
||||
Assert.assertEquals(1L, ndv, 0.01);
|
||||
|
||||
double avgSize = columnStats.getAvgSizeByte();
|
||||
Assert.assertEquals(8.0f, avgSize, 0.0001);
|
||||
|
||||
double maxSize = columnStats.getMaxSizeByte();
|
||||
Assert.assertEquals(8L, maxSize, 0.01);
|
||||
|
||||
double maxValue = columnStats.getMaxValue();
|
||||
Assert.assertEquals(1000, maxValue, 0.01);
|
||||
|
||||
double minValue = columnStats.getMinValue();
|
||||
Assert.assertEquals(0L, minValue, 0.01);
|
||||
|
||||
double numNulls = columnStats.getNumNulls();
|
||||
Assert.assertEquals(2, numNulls, 0.01);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateColumnStats_ThrowsAnalysisException() {
|
||||
// Setup
|
||||
Type columnType = Type.fromPrimitiveType(PrimitiveType.INVALID_TYPE);
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.AVG_SIZE, "abc");
|
||||
// Run the test
|
||||
Assert.assertThrows(AnalysisException.class, () -> tableStatsUnderTest
|
||||
.updateColumnStats("columnName", columnType, statsTypeToValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetShowInfo() throws AnalysisException {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
|
||||
|
||||
tableStatsUnderTest.updateTableStats(statsTypeToValue);
|
||||
String[] expectedInfo = {"1000.0", "10240"};
|
||||
|
||||
// Run the test
|
||||
List<String> showInfo = tableStatsUnderTest.getShowInfo();
|
||||
String[] result = showInfo.toArray(new String[0]);
|
||||
|
||||
// Verify the results
|
||||
Assert.assertArrayEquals(expectedInfo, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetShowInfoWithPartitionName() throws AnalysisException {
|
||||
// Setup
|
||||
Map<StatsType, String> statsTypeToValue = new HashMap<>();
|
||||
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
|
||||
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
|
||||
|
||||
tableStatsUnderTest.updatePartitionStats("partitionName", statsTypeToValue);
|
||||
String[] expectedInfo = {"1000", "10240"};
|
||||
|
||||
// Run the test
|
||||
List<String> showInfo = tableStatsUnderTest.getShowInfo("partitionName");
|
||||
String[] result = showInfo.toArray(new String[0]);
|
||||
|
||||
// Verify the results
|
||||
Assert.assertArrayEquals(expectedInfo, result);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user