[refactor](statistics) Remove deprecated statistics related codes (#14797)

This commit is contained in:
Kikyou1997
2022-12-07 20:41:00 +08:00
committed by GitHub
parent a3095e29d5
commit dfb02a7104
37 changed files with 24 additions and 5498 deletions

View File

@ -1268,10 +1268,6 @@ alter_stmt ::=
{:
RESULT = new AlterSqlBlockRuleStmt(ruleName, properties);
:}
| KW_ALTER KW_TABLE table_name:tbl KW_SET KW_STATS LPAREN key_value_map:map RPAREN opt_partition_names:partitionNames
{:
RESULT = new AlterTableStatsStmt(tbl, map, partitionNames);
:}
| KW_ALTER KW_TABLE table_name:tbl KW_MODIFY KW_COLUMN ident:columnName
KW_SET KW_STATS LPAREN key_value_map:map RPAREN opt_partition_names:partitionNames
{:
@ -3672,11 +3668,6 @@ show_param ::=
{:
RESULT = new ShowSyncJobStmt(dbName);
:}
/* show table stats */
| KW_TABLE KW_STATS opt_table_name:tbl opt_partition_names:partitionNames
{:
RESULT = new ShowTableStatsStmt(tbl, partitionNames);
:}
/* show column stats */
| KW_COLUMN KW_STATS table_name:tbl opt_partition_names:partitionNames
{:

View File

@ -31,7 +31,7 @@ import org.apache.doris.common.util.PrintableMap;
import org.apache.doris.common.util.Util;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.ColumnStat;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.StatsType;
import com.google.common.collect.ImmutableSet;
@ -56,12 +56,12 @@ public class AlterColumnStatsStmt extends DdlStmt {
private static final ImmutableSet<StatsType> CONFIGURABLE_PROPERTIES_SET = new ImmutableSet.Builder<StatsType>()
.add(StatsType.ROW_COUNT)
.add(ColumnStat.NDV)
.add(ColumnStat.AVG_SIZE)
.add(ColumnStat.MAX_SIZE)
.add(ColumnStat.NUM_NULLS)
.add(ColumnStat.MIN_VALUE)
.add(ColumnStat.MAX_VALUE)
.add(ColumnStatistic.NDV)
.add(ColumnStatistic.AVG_SIZE)
.add(ColumnStatistic.MAX_SIZE)
.add(ColumnStatistic.NUM_NULLS)
.add(ColumnStatistic.MIN_VALUE)
.add(ColumnStatistic.MAX_VALUE)
.add(StatsType.DATA_SIZE)
.build();

View File

@ -1,165 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.analysis;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Table;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.PrintableMap;
import org.apache.doris.common.util.Util;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.StatsType;
import org.apache.doris.statistics.TableStats;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
/**
* Manually inject statistics for tables or partitions.
* Only OLAP table statistics are supported.
*
* syntax:
* ALTER TABLE table_name
* SET STATS ('k1' = 'v1', ...) [ PARTITIONS(p_name1, p_name2...) ]
*/
public class AlterTableStatsStmt extends DdlStmt {
private static final ImmutableSet<StatsType> CONFIGURABLE_PROPERTIES_SET =
new ImmutableSet.Builder<StatsType>()
.add(TableStats.DATA_SIZE)
.add(TableStats.ROW_COUNT)
.build();
private final TableName tableName;
private final PartitionNames optPartitionNames;
private final Map<String, String> properties;
private final List<String> partitionNames = Lists.newArrayList();
private final Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
public AlterTableStatsStmt(TableName tableName, Map<String, String> properties,
PartitionNames optPartitionNames) {
this.tableName = tableName;
this.properties = properties == null ? Maps.newHashMap() : properties;
this.optPartitionNames = optPartitionNames;
}
public TableName getTableName() {
return tableName;
}
public List<String> getPartitionNames() {
return partitionNames;
}
public Map<StatsType, String> getStatsTypeToValue() {
return statsTypeToValue;
}
@Override
public void analyze(Analyzer analyzer) throws UserException {
super.analyze(analyzer);
// check table name
tableName.analyze(analyzer);
// disallow external catalog
Util.prohibitExternalCatalog(tableName.getCtl(), this.getClass().getSimpleName());
// check partition
checkPartitionNames();
// check properties
Optional<StatsType> optional = properties.keySet().stream().map(StatsType::fromString)
.filter(statsType -> !CONFIGURABLE_PROPERTIES_SET.contains(statsType))
.findFirst();
if (optional.isPresent()) {
throw new AnalysisException(optional.get() + " is invalid statistics");
}
// check auth
if (!Env.getCurrentEnv().getAuth()
.checkTblPriv(ConnectContext.get(), tableName.getDb(), tableName.getTbl(), PrivPredicate.ALTER)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "ALTER TABLE STATS",
ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(),
tableName.getDb() + ": " + tableName.getTbl());
}
// get statsTypeToValue
properties.forEach((key, value) -> {
StatsType statsType = StatsType.fromString(key);
statsTypeToValue.put(statsType, value);
});
}
private void checkPartitionNames() throws AnalysisException {
Database db = analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(tableName.getDb());
Table table = db.getTableOrAnalysisException(tableName.getTbl());
if (table.getType() != Table.TableType.OLAP) {
throw new AnalysisException("Only OLAP table statistics are supported");
}
if (optPartitionNames != null) {
OlapTable olapTable = (OlapTable) table;
if (!olapTable.isPartitioned()) {
throw new AnalysisException("Not a partitioned table: " + olapTable.getName());
}
optPartitionNames.analyze(analyzer);
List<String> names = optPartitionNames.getPartitionNames();
Set<String> olapPartitionNames = olapTable.getPartitionNames();
Optional<String> optional = names.stream()
.filter(name -> !olapPartitionNames.contains(name))
.findFirst();
if (optional.isPresent()) {
throw new AnalysisException("Partition does not exist: " + optional.get());
}
partitionNames.addAll(names);
}
}
@Override
public String toSql() {
StringBuilder sb = new StringBuilder();
sb.append("ALTER TABLE ");
sb.append(tableName.toSql());
sb.append(" SET STATS ");
sb.append("(");
sb.append(new PrintableMap<>(properties,
" = ", true, false));
sb.append(") ");
if (optPartitionNames != null) {
sb.append(optPartitionNames.toSql());
}
return sb.toString();
}
}

View File

@ -31,7 +31,6 @@ import org.apache.doris.mysql.privilege.PaloAuth;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.ShowResultSetMetaData;
import org.apache.doris.statistics.StatisticsJob;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
@ -271,7 +270,7 @@ public class ShowAnalyzeStmt extends ShowStmt {
stateValue = value.toUpperCase();
try {
StatisticsJob.JobState.valueOf(stateValue);
// support it later
} catch (Exception e) {
valid = false;
}

View File

@ -1,110 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.Util;
import org.apache.doris.qe.ShowResultSetMetaData;
import org.apache.doris.statistics.TableStats;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import java.util.Collections;
import java.util.List;
public class ShowTableStatsStmt extends ShowStmt {
private static final ImmutableList<String> TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("table_name")
.add(TableStats.ROW_COUNT.getValue())
.add(TableStats.DATA_SIZE.getValue())
.build();
private final TableName tableName;
// after analyzed
// There is only on attribute for both @tableName and @dbName at the same time.
private String dbName;
private final PartitionNames partitionNames;
public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames) {
this.tableName = tableName;
this.partitionNames = partitionNames;
}
public String getTableName() {
Preconditions.checkArgument(isAnalyzed(), "The db name must be obtained after the parsing is complete");
if (tableName == null) {
return null;
}
return tableName.getTbl();
}
public String getDbName() {
Preconditions.checkArgument(isAnalyzed(), "The db name must be obtained after the parsing is complete");
if (tableName == null) {
return dbName;
}
return tableName.getDb();
}
public List<String> getPartitionNames() {
if (partitionNames == null) {
return Collections.emptyList();
}
return partitionNames.getPartitionNames();
}
@Override
public void analyze(Analyzer analyzer) throws UserException {
super.analyze(analyzer);
if (tableName == null) {
dbName = analyzer.getDefaultDb();
if (Strings.isNullOrEmpty(dbName)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_NO_DB_ERROR);
}
return;
}
tableName.analyze(analyzer);
if (partitionNames != null) {
partitionNames.analyze(analyzer);
}
// disallow external catalog
Util.prohibitExternalCatalog(tableName.getCtl(), this.getClass().getSimpleName());
}
@Override
public ShowResultSetMetaData getMetaData() {
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();
for (String title : TITLE_NAMES) {
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
}
return builder.build();
}
}

View File

@ -211,10 +211,6 @@ import org.apache.doris.service.FrontendOptions;
import org.apache.doris.statistics.AnalysisManager;
import org.apache.doris.statistics.AnalysisTaskScheduler;
import org.apache.doris.statistics.StatisticsCache;
import org.apache.doris.statistics.StatisticsJobManager;
import org.apache.doris.statistics.StatisticsJobScheduler;
import org.apache.doris.statistics.StatisticsManager;
import org.apache.doris.statistics.StatisticsTaskScheduler;
import org.apache.doris.system.Backend;
import org.apache.doris.system.FQDNManager;
import org.apache.doris.system.Frontend;
@ -396,11 +392,6 @@ public class Env {
private DeployManager deployManager;
private TabletStatMgr tabletStatMgr;
// statistics
private StatisticsManager statisticsManager;
private StatisticsJobManager statisticsJobManager;
private StatisticsJobScheduler statisticsJobScheduler;
private StatisticsTaskScheduler statisticsTaskScheduler;
private PaloAuth auth;
@ -594,11 +585,6 @@ public class Env {
this.globalTransactionMgr = new GlobalTransactionMgr(this);
this.tabletStatMgr = new TabletStatMgr();
// statistics
this.statisticsManager = new StatisticsManager();
this.statisticsJobManager = new StatisticsJobManager();
this.statisticsJobScheduler = new StatisticsJobScheduler();
this.statisticsTaskScheduler = new StatisticsTaskScheduler();
this.auth = new PaloAuth();
this.domainResolver = new DomainResolver(auth);
@ -756,23 +742,6 @@ public class Env {
return checkpointer;
}
// statistics
public StatisticsManager getStatisticsManager() {
return statisticsManager;
}
public StatisticsJobManager getStatisticsJobManager() {
return statisticsJobManager;
}
public StatisticsJobScheduler getStatisticsJobScheduler() {
return statisticsJobScheduler;
}
public StatisticsTaskScheduler getStatisticsTaskScheduler() {
return statisticsTaskScheduler;
}
// Use tryLock to avoid potential dead lock
private boolean tryLock(boolean mustLock) {
while (true) {
@ -1429,8 +1398,6 @@ public class Env {
partitionInMemoryInfoCollector.start();
streamLoadRecordMgr.start();
getInternalCatalog().getIcebergTableCreationRecordMgr().start();
this.statisticsJobScheduler.start();
this.statisticsTaskScheduler.start();
new InternalSchemaInitializer().start();
if (Config.enable_fqdn_mode) {
fqdnManager.start();

View File

@ -484,15 +484,12 @@ public class OlapScanNode extends ScanNode {
* Remove the method after statistics collection is working properly
*/
public void mockRowCountInStatistic() {
long tableId = desc.getTable().getId();
cardinality = 0;
for (long selectedPartitionId : selectedPartitionIds) {
final Partition partition = olapTable.getPartition(selectedPartitionId);
final MaterializedIndex baseIndex = partition.getBaseIndex();
cardinality += baseIndex.getRowCount();
}
Env.getCurrentEnv().getStatisticsManager()
.getStatistics().mockTableStatsWithRowCount(tableId, cardinality);
}
@Override

View File

@ -39,7 +39,6 @@ import org.apache.doris.analysis.AlterResourceStmt;
import org.apache.doris.analysis.AlterRoutineLoadStmt;
import org.apache.doris.analysis.AlterSqlBlockRuleStmt;
import org.apache.doris.analysis.AlterSystemStmt;
import org.apache.doris.analysis.AlterTableStatsStmt;
import org.apache.doris.analysis.AlterTableStmt;
import org.apache.doris.analysis.AlterUserStmt;
import org.apache.doris.analysis.AlterViewStmt;
@ -165,8 +164,6 @@ public class DdlExecutor {
env.createMaterializedView((CreateMaterializedViewStmt) ddlStmt);
} else if (ddlStmt instanceof AlterTableStmt) {
env.alterTable((AlterTableStmt) ddlStmt);
} else if (ddlStmt instanceof AlterTableStatsStmt) {
env.getStatisticsManager().alterTableStatistics((AlterTableStatsStmt) ddlStmt);
} else if (ddlStmt instanceof AlterColumnStatsStmt) {
StatisticsRepository.alterColumnStatistics((AlterColumnStatsStmt) ddlStmt);
} else if (ddlStmt instanceof AlterViewStmt) {
@ -342,7 +339,7 @@ public class DdlExecutor {
} else if (ddlStmt instanceof AlterUserStmt) {
env.getAuth().alterUser((AlterUserStmt) ddlStmt);
} else if (ddlStmt instanceof DropTableStatsStmt) {
env.getStatisticsManager().dropStats((DropTableStatsStmt) ddlStmt);
// TODO: support later
} else {
throw new DdlException("Unknown statement.");
}

View File

@ -85,7 +85,6 @@ import org.apache.doris.analysis.ShowStreamLoadStmt;
import org.apache.doris.analysis.ShowSyncJobStmt;
import org.apache.doris.analysis.ShowTableCreationStmt;
import org.apache.doris.analysis.ShowTableIdStmt;
import org.apache.doris.analysis.ShowTableStatsStmt;
import org.apache.doris.analysis.ShowTableStatusStmt;
import org.apache.doris.analysis.ShowTableStmt;
import org.apache.doris.analysis.ShowTabletStmt;
@ -179,7 +178,6 @@ import org.apache.doris.mtmv.metadata.MTMVJob;
import org.apache.doris.mtmv.metadata.MTMVTask;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.StatisticsJobManager;
import org.apache.doris.statistics.StatisticsRepository;
import org.apache.doris.system.Backend;
import org.apache.doris.system.Diagnoser;
@ -360,8 +358,6 @@ public class ShowExecutor {
handleShowSyncJobs();
} else if (stmt instanceof ShowSqlBlockRuleStmt) {
handleShowSqlBlockRule();
} else if (stmt instanceof ShowTableStatsStmt) {
handleShowTableStats();
} else if (stmt instanceof ShowColumnStatsStmt) {
handleShowColumnStats();
} else if (stmt instanceof ShowTableCreationStmt) {
@ -2141,12 +2137,6 @@ public class ShowExecutor {
}
private void handleShowTableStats() throws AnalysisException {
ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt;
List<List<String>> results = Env.getCurrentEnv().getStatisticsManager().showTableStatsList(showTableStatsStmt);
resultSet = new ShowResultSet(showTableStatsStmt.getMetaData(), results);
}
private void handleShowColumnStats() throws AnalysisException {
ShowColumnStatsStmt showColumnStatsStmt = (ShowColumnStatsStmt) stmt;
TableName tableName = showColumnStatsStmt.getTableName();
@ -2313,10 +2303,7 @@ public class ShowExecutor {
}
private void handleShowAnalyze() throws AnalysisException {
ShowAnalyzeStmt showStmt = (ShowAnalyzeStmt) stmt;
StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager();
List<List<String>> results = jobManager.getAnalyzeJobInfos(showStmt);
resultSet = new ShowResultSet(showStmt.getMetaData(), results);
// TODO: Support later
}
private void handleCopyTablet() throws AnalysisException {

View File

@ -102,9 +102,9 @@ public class AnalysisTaskExecutor extends Thread {
private void doFetchAndExecute() {
BaseAnalysisTask task = taskScheduler.getPendingTasks();
AnalysisTaskWrapper jobWrapper = new AnalysisTaskWrapper(this, task);
AnalysisTaskWrapper taskWrapper = new AnalysisTaskWrapper(this, task);
incr();
executors.submit(jobWrapper);
executors.submit(taskWrapper);
Env.getCurrentEnv().getAnalysisManager()
.updateTaskStatus(task.info,
AnalysisState.RUNNING, "", System.currentTimeMillis());

View File

@ -1,332 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.util.Util;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.collect.Lists;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
/**
* There are the statistics of column.
* The column stats are mainly used to provide input for the Optimizer's cost model.
* <p>
* The description of column stats are following:
* 1. @ndv: The number distinct values of column.
* 2. @avgSize: The average size of column. The unit is bytes.
* 3. @maxSize: The max size of column. The unit is bytes.
* 4. @numNulls: The number of nulls.
* 5. @minValue: The min value of column.
* 6. @maxValue: The max value of column.
* <p>
* The granularity of the statistics is whole table.
* For example:
* "@ndv = 10" means that the number distinct values is 10 in the whole table.
*/
public class ColumnStat {
public static final StatsType NDV = StatsType.NDV;
public static final StatsType AVG_SIZE = StatsType.AVG_SIZE;
public static final StatsType MAX_SIZE = StatsType.MAX_SIZE;
public static final StatsType NUM_NULLS = StatsType.NUM_NULLS;
public static final StatsType MIN_VALUE = StatsType.MIN_VALUE;
public static final StatsType MAX_VALUE = StatsType.MAX_VALUE;
public static final ColumnStat UNKNOWN = new ColumnStat();
private static final Predicate<Double> DESIRED_NDV_PRED = (v) -> v >= -1L;
private static final Predicate<Double> DESIRED_AVG_SIZE_PRED = (v) -> (v == -1) || (v >= 0);
private static final Predicate<Double> DESIRED_MAX_SIZE_PRED = (v) -> v >= -1L;
private static final Predicate<Double> DESIRED_NUM_NULLS_PRED = (v) -> v >= -1L;
public static final Set<Type> MAX_MIN_UNSUPPORTED_TYPE = new HashSet<>();
static {
MAX_MIN_UNSUPPORTED_TYPE.add(Type.HLL);
MAX_MIN_UNSUPPORTED_TYPE.add(Type.BITMAP);
MAX_MIN_UNSUPPORTED_TYPE.add(Type.ARRAY);
MAX_MIN_UNSUPPORTED_TYPE.add(Type.STRUCT);
MAX_MIN_UNSUPPORTED_TYPE.add(Type.MAP);
}
private double ndv = -1;
private double avgSizeByte = -1;
private double maxSizeByte = -1;
private double numNulls = -1;
private double minValue = Double.NaN;
private double maxValue = Double.NaN;
// For display only.
private LiteralExpr minExpr;
private LiteralExpr maxExpr;
private double selectivity = 1.0;
public static ColumnStat createDefaultColumnStats() {
ColumnStat columnStat = new ColumnStat();
columnStat.setAvgSizeByte(1);
columnStat.setMaxSizeByte(1);
columnStat.setNdv(1);
columnStat.setNumNulls(0);
return columnStat;
}
public static boolean isUnKnown(ColumnStat stats) {
return stats == UNKNOWN;
}
public ColumnStat() {
}
public ColumnStat(ColumnStat other) {
this.ndv = other.ndv;
this.avgSizeByte = other.avgSizeByte;
this.maxSizeByte = other.maxSizeByte;
this.numNulls = other.numNulls;
this.minValue = other.minValue;
this.maxValue = other.maxValue;
this.selectivity = other.selectivity;
}
public ColumnStat(double ndv, double avgSizeByte,
double maxSizeByte, double numNulls, double minValue, double maxValue) {
this.ndv = ndv;
this.avgSizeByte = avgSizeByte;
this.maxSizeByte = maxSizeByte;
this.numNulls = numNulls;
this.minValue = minValue;
this.maxValue = maxValue;
}
public double getNdv() {
return ndv;
}
public double getAvgSizeByte() {
return avgSizeByte;
}
public double getMaxSizeByte() {
return maxSizeByte;
}
public double getNumNulls() {
return numNulls;
}
public double getMinValue() {
return minValue;
}
public double getMaxValue() {
return maxValue;
}
public void setNdv(double ndv) {
this.ndv = ndv;
}
public void setAvgSizeByte(double avgSizeByte) {
this.avgSizeByte = avgSizeByte;
}
public void setMaxSizeByte(double maxSizeByte) {
this.maxSizeByte = maxSizeByte;
}
public void setNumNulls(double numNulls) {
this.numNulls = numNulls;
}
public void setMinValue(double minValue) {
this.minValue = minValue;
}
public void setMaxValue(double maxValue) {
this.maxValue = maxValue;
}
public void updateStats(Type columnType, Map<StatsType, String> statsTypeToValue) throws AnalysisException {
for (Map.Entry<StatsType, String> entry : statsTypeToValue.entrySet()) {
StatsType statsType = entry.getKey();
switch (statsType) {
case NDV:
ndv = Util.getDoublePropertyOrDefault(entry.getValue(), ndv,
DESIRED_NDV_PRED, NDV + " should >= -1");
break;
case AVG_SIZE:
avgSizeByte = Util.getDoublePropertyOrDefault(entry.getValue(), avgSizeByte,
DESIRED_AVG_SIZE_PRED, AVG_SIZE + " should (>=0) or (=-1)");
break;
case MAX_SIZE:
maxSizeByte = Util.getDoublePropertyOrDefault(entry.getValue(), maxSizeByte,
DESIRED_MAX_SIZE_PRED, MAX_SIZE + " should >=-1");
break;
case NUM_NULLS:
numNulls = Util.getDoublePropertyOrDefault(entry.getValue(), numNulls,
DESIRED_NUM_NULLS_PRED, NUM_NULLS + " should >=-1");
break;
case MIN_VALUE:
if (MAX_MIN_UNSUPPORTED_TYPE.contains(statsType)) {
minValue = Double.NEGATIVE_INFINITY;
} else {
minExpr = StatisticsUtil.readableValue(columnType, entry.getValue());
minValue = StatisticsUtil.convertToDouble(columnType, entry.getValue());
}
break;
case MAX_VALUE:
if (MAX_MIN_UNSUPPORTED_TYPE.contains(statsType)) {
maxValue = Double.NEGATIVE_INFINITY;
} else {
maxExpr = StatisticsUtil.readableValue(columnType, entry.getValue());
maxValue = StatisticsUtil.convertToDouble(columnType, entry.getValue());
}
break;
default:
throw new AnalysisException("Unknown stats type: " + statsType);
}
}
}
public List<String> getShowInfo() {
List<String> result = Lists.newArrayList();
result.add(Double.toString(ndv));
result.add(Double.toString(avgSizeByte));
result.add(Double.toString(maxSizeByte));
result.add(Double.toString(numNulls));
result.add(Double.toString(minValue));
result.add(Double.toString(maxValue));
return result;
}
public ColumnStat copy() {
return new ColumnStat(this);
}
public boolean hasIntersect(ColumnStat another) {
double leftMin = this.getMinValue();
double rightMin = another.getMinValue();
double leftMax = this.getMaxValue();
double rightMax = another.getMaxValue();
return Math.max(leftMin, rightMin) <= Math.min(leftMax, rightMax);
}
/**
* Return default column statistic.
*/
public static ColumnStat getDefaultColumnStats() {
return new ColumnStat();
}
/**
* Merge column statistics(the original statistics should not be modified)
*
* @param left statistics to be merged
* @param right statistics to be merged
*/
public static ColumnStat mergeColumnStats(ColumnStat left, ColumnStat right) {
// merge ndv
double leftNdv = left.getNdv();
double rightNdv = right.getNdv();
if (leftNdv == -1) {
leftNdv = rightNdv;
} else {
leftNdv = rightNdv != -1 ? (leftNdv + rightNdv) : leftNdv;
}
double leftAvgSize = left.getAvgSizeByte();
double rightAvgSize = right.getAvgSizeByte();
if (leftAvgSize == -1) {
leftAvgSize = rightAvgSize;
} else {
leftAvgSize = rightAvgSize != -1 ? ((leftAvgSize + rightAvgSize) / 2) : leftAvgSize;
}
// merge max_size
double leftMaxSize = left.getMaxSizeByte();
double rightMaxSize = right.getMaxSizeByte();
if (leftMaxSize == -1) {
leftMaxSize = rightMaxSize;
} else {
leftMaxSize = Math.max(leftMaxSize, rightMaxSize);
}
// merge num_nulls
double leftNumNulls = left.getNumNulls();
double rightNumNulls = right.getNumNulls();
if (leftNumNulls == -1) {
leftNumNulls = rightNumNulls;
} else {
leftNumNulls = rightNumNulls != -1 ? (leftNumNulls + rightNumNulls) : leftNumNulls;
}
// merge min_value
double leftMinValue = left.getMinValue();
double rightMinValue = right.getMinValue();
leftMinValue = Math.min(leftMinValue, rightMinValue);
// merge max_value
double leftMaxValue = left.getMaxValue();
double rightMaxValue = right.getMaxValue();
leftMaxValue = Math.max(rightMaxValue, leftMaxValue);
// generate the new merged-statistics
return new ColumnStat(leftNdv, leftAvgSize, leftMaxSize, leftNumNulls, leftMinValue, leftMaxValue);
}
public static boolean isAlmostUnique(double ndv, double rowCount) {
return rowCount * 0.9 < ndv && ndv < rowCount * 1.1;
}
public double getSelectivity() {
return selectivity;
}
public void setSelectivity(double selectivity) {
this.selectivity = selectivity;
}
public double ndvIntersection(ColumnStat other) {
if (maxValue == minValue) {
if (minValue <= other.maxValue && minValue >= other.minValue) {
return 1;
} else {
return 0;
}
}
double min = Math.max(minValue, other.minValue);
double max = Math.min(maxValue, other.maxValue);
if (min < max) {
return Math.ceil(ndv * (max - min) / (maxValue - minValue));
} else if (min > max) {
return 0;
} else {
return 1;
}
}
}

View File

@ -36,6 +36,13 @@ import java.util.Set;
public class ColumnStatistic {
public static final StatsType NDV = StatsType.NDV;
public static final StatsType AVG_SIZE = StatsType.AVG_SIZE;
public static final StatsType MAX_SIZE = StatsType.MAX_SIZE;
public static final StatsType NUM_NULLS = StatsType.NUM_NULLS;
public static final StatsType MIN_VALUE = StatsType.MIN_VALUE;
public static final StatsType MAX_VALUE = StatsType.MAX_VALUE;
private static final Logger LOG = LogManager.getLogger(StmtExecutor.class);
public static ColumnStatistic DEFAULT = new ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1)
@ -137,6 +144,10 @@ public class ColumnStatistic {
}
}
public static boolean isAlmostUnique(double ndv, double rowCount) {
return rowCount * 0.9 < ndv && ndv < rowCount * 1.1;
}
public ColumnStatistic copy() {
return new ColumnStatisticBuilder().setCount(count).setNdv(ndv).setAvgSizeByte(avgSizeByte)
.setNumNulls(numNulls).setDataSize(dataSize).setMinValue(minValue)
@ -186,7 +197,7 @@ public class ColumnStatistic {
}
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(this);
Double rowsAfterFilter = rowCount * selectivity;
if (ColumnStat.isAlmostUnique(ndv, rowCount)) {
if (isAlmostUnique(ndv, rowCount)) {
builder.setSelectivity(this.selectivity * selectivity);
builder.setNdv(ndv * selectivity);
} else {

View File

@ -1,147 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.Table;
import org.apache.doris.common.DdlException;
import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
import com.google.common.collect.Lists;
import java.util.List;
/**
* A statistics task that directly collects statistics by reading FE meta.
* e.g. for fixed-length types such as Int type and Long type we get their size from metadata.
* 1.The granularity of row count can be table or partition, and the type should be table or partition
* 2.The granularity of data size can be table or partition, and the type should be table or partition
* 3.The granularity of max and min size can be table or partition, and the type should be column
*/
public class MetaStatisticsTask extends StatisticsTask {
public MetaStatisticsTask(long jobId, List<StatisticsDesc> statsDescs) {
super(jobId, statsDescs);
}
@Override
public StatisticsTaskResult call() throws Exception {
checkStatisticsDesc();
List<TaskResult> taskResults = Lists.newArrayList();
for (StatisticsDesc statsDesc : statsDescs) {
StatsCategory category = statsDesc.getStatsCategory();
StatsGranularity granularity = statsDesc.getStatsGranularity();
TaskResult result = createNewTaskResult(category, granularity);
List<StatsType> statsTypes = statsDesc.getStatsTypes();
for (StatsType statsType : statsTypes) {
switch (statsType) {
case MAX_SIZE:
case AVG_SIZE:
getColSize(category, statsType, result);
break;
case ROW_COUNT:
getRowCount(category.getDbId(), category.getTableId(), granularity, result);
break;
case DATA_SIZE:
getDataSize(category.getDbId(), category.getTableId(), granularity, result);
break;
default:
throw new DdlException("Unsupported statistics type(" + statsType + ").");
}
}
taskResults.add(result);
}
return new StatisticsTaskResult(taskResults);
}
private void getColSize(StatsCategory category, StatsType statsType,
TaskResult result) throws DdlException {
OlapTable table = getNotNullOlapTable(category.getDbId(), category.getTableId());
Column column = getNotNullColumn(table, category.getColumnName());
int colSize = column.getDataType().getSlotSize();
result.getStatsTypeToValue().put(statsType, String.valueOf(colSize));
}
private void getRowCount(long dbId, long tableId, StatsGranularity granularity,
TaskResult result) throws DdlException {
OlapTable table = getNotNullOlapTable(dbId, tableId);
switch (granularity.getGranularity()) {
case TABLE:
long tblRowCount = table.getRowCount();
result.getStatsTypeToValue().put(StatsType.ROW_COUNT, String.valueOf(tblRowCount));
break;
case PARTITION:
Partition partition = getNotNullPartition(granularity, table);
long ptRowCount = partition.getBaseIndex().getRowCount();
result.getStatsTypeToValue().put(StatsType.ROW_COUNT, String.valueOf(ptRowCount));
break;
case TABLET:
default:
throw new DdlException("Unsupported granularity(" + granularity + ").");
}
}
private void getDataSize(long dbId, long tableId, StatsGranularity granularity,
TaskResult result) throws DdlException {
OlapTable table = getNotNullOlapTable(dbId, tableId);
switch (granularity.getGranularity()) {
case TABLE:
long tblDataSize = table.getDataSize();
result.getStatsTypeToValue().put(StatsType.DATA_SIZE, String.valueOf(tblDataSize));
break;
case PARTITION:
Partition partition = getNotNullPartition(granularity, table);
long partitionSize = partition.getBaseIndex().getDataSize();
result.getStatsTypeToValue().put(StatsType.DATA_SIZE, String.valueOf(partitionSize));
break;
case TABLET:
default:
throw new DdlException("Unsupported granularity(" + granularity + ").");
}
}
private OlapTable getNotNullOlapTable(long dbId, long tableId) throws DdlException {
Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(dbId);
return (OlapTable) db.getTableOrDdlException(tableId);
}
private Partition getNotNullPartition(StatsGranularity granularity, OlapTable olapTable) throws DdlException {
Partition partition = olapTable.getPartition(granularity.getPartitionId());
if (partition == null) {
throw new DdlException("Partition(" + granularity.getPartitionId() + ") not found.");
}
return partition;
}
private Column getNotNullColumn(Table table, String colName) throws DdlException {
Column column = table.getColumn(colName);
if (column == null) {
throw new DdlException("Column(" + colName + ") not found.");
}
return column;
}
}

View File

@ -1,163 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.util.Util;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
/**
* There are the statistics of partition.
* The partition stats are mainly used to provide input for the Optimizer's cost model.
* The description of partition stats are following:
* - @rowCount: The row count of partition.
* - @dataSize: The data size of partition.
* - @nameToColumnStats: <@String columnName, @ColumnStats columnStats>
* <p>
* Each column in the Table will have corresponding @ColumnStats.
* Those @ColumnStats are recorded in @nameToColumnStats form of MAP.
* This facilitates the optimizer to quickly find the corresponding:
* - @ColumnStats: based on the column name.
* - @rowCount: The row count of partition.
* - @dataSize: The data size of partition.
* <p>
* The granularity of the statistics is whole partition.
* For example: "@rowCount = 1000" means that the row count is 1000 in the whole partition.
* <p>
* After the statistics task is successfully completed, update the PartitionStats,
* PartitionStats should not be updated in any other way.
*/
public class PartitionStats {
public static final StatsType DATA_SIZE = StatsType.DATA_SIZE;
public static final StatsType ROW_COUNT = StatsType.ROW_COUNT;
private static final Predicate<Long> DESIRED_ROW_COUNT_PRED = (v) -> v >= -1L;
private static final Predicate<Long> DESIRED_DATA_SIZE_PRED = (v) -> v >= -1L;
private long rowCount = -1;
private long dataSize = -1;
private final Map<String, ColumnStat> nameToColumnStats = Maps.newConcurrentMap();
/**
* Return a default partition statistic.
*/
public static PartitionStats getDefaultPartitionStats() {
return new PartitionStats();
}
public PartitionStats() {
}
public PartitionStats(long rowCount, long dataSize) {
this.rowCount = rowCount;
this.dataSize = dataSize;
}
public long getRowCount() {
return rowCount;
}
public void setRowCount(long rowCount) {
this.rowCount = rowCount;
}
public long getDataSize() {
return dataSize;
}
public void setDataSize(long dataSize) {
this.dataSize = dataSize;
}
public Map<String, ColumnStat> getNameToColumnStats() {
return nameToColumnStats;
}
public ColumnStat getColumnStats(String columnName) {
return nameToColumnStats.get(columnName);
}
/**
* If the column statistics do not exist, the default statistics will be returned.
*/
public ColumnStat getColumnStatsOrDefault(String columnName) {
return nameToColumnStats.getOrDefault(columnName,
ColumnStat.getDefaultColumnStats());
}
/**
* Show the partition row count and data size.
*/
public List<String> getShowInfo() {
List<String> result = Lists.newArrayList();
result.add(Long.toString(rowCount));
result.add(Long.toString(dataSize));
return result;
}
/**
* After the statistics task is successfully completed, update the statistics of the partition,
* statistics should not be updated in any other way.
*/
public void updatePartitionStats(Map<StatsType, String> statsTypeToValue) throws AnalysisException {
for (Map.Entry<StatsType, String> entry : statsTypeToValue.entrySet()) {
StatsType statsType = entry.getKey();
String value = entry.getValue();
if (statsType == ROW_COUNT) {
rowCount = Util.getLongPropertyOrDefault(value, rowCount,
DESIRED_ROW_COUNT_PRED, ROW_COUNT + " should >= -1");
} else if (statsType == DATA_SIZE) {
dataSize = Util.getLongPropertyOrDefault(value, dataSize,
DESIRED_DATA_SIZE_PRED, DATA_SIZE + " should >= -1");
}
}
}
/**
* After the statistics task is successfully completed, update the statistics of the column,
* statistics should not be updated in any other way.
*/
public void updateColumnStats(String columnName,
Type columnType,
Map<StatsType, String> statsTypeToValue) throws AnalysisException {
ColumnStat columnStat = getNotNullColumnStats(columnName);
columnStat.updateStats(columnType, statsTypeToValue);
}
/**
* If column stats is not exist, create a new one.
*
* @param columnName column name
* @return @ColumnStats
*/
public ColumnStat getNotNullColumnStats(String columnName) {
ColumnStat columnStat = nameToColumnStats.get(columnName);
if (columnStat == null) {
columnStat = new ColumnStat();
nameToColumnStats.put(columnName, columnStat);
}
return columnStat;
}
}

View File

@ -1,142 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.Table;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.InvalidFormatException;
import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
import org.apache.doris.statistics.StatsGranularity.Granularity;
import org.apache.doris.statistics.util.InternalQuery;
import org.apache.doris.statistics.util.InternalQueryResult;
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
import org.apache.doris.statistics.util.InternalSqlTemplate;
import org.apache.doris.statistics.util.InternalSqlTemplate.QueryType;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.List;
import java.util.Map;
/**
* A statistics task that collects statistics by executing query.
* The results of the query will be returned as @StatisticsTaskResult.
*/
public class SQLStatisticsTask extends StatisticsTask {
protected QueryType queryType = QueryType.FULL;
protected String statement;
public SQLStatisticsTask(long jobId, List<StatisticsDesc> statsDescs) {
super(jobId, statsDescs);
}
@Override
public StatisticsTaskResult call() throws Exception {
checkStatisticsDesc();
List<TaskResult> taskResults = Lists.newArrayList();
for (StatisticsDesc statsDesc : statsDescs) {
statement = constructQuery(statsDesc);
TaskResult taskResult = executeQuery(statsDesc);
taskResults.add(taskResult);
LOG.info("Collected statistics successfully by SQL: {}", statement);
}
return new StatisticsTaskResult(taskResults);
}
protected String constructQuery(StatisticsDesc statsDesc) throws DdlException,
InvalidFormatException {
Map<String, String> params = getQueryParams(statsDesc);
List<StatsType> statsTypes = statsDesc.getStatsTypes();
StatsType type = statsTypes.get(0);
StatsGranularity statsGranularity = statsDesc.getStatsGranularity();
Granularity granularity = statsGranularity.getGranularity();
boolean nonPartitioned = granularity != Granularity.PARTITION;
switch (type) {
case ROW_COUNT:
return nonPartitioned ? InternalSqlTemplate.buildStatsRowCountSql(params, queryType)
: InternalSqlTemplate.buildStatsPartitionRowCountSql(params, queryType);
case NUM_NULLS:
return nonPartitioned ? InternalSqlTemplate.buildStatsNumNullsSql(params, queryType)
: InternalSqlTemplate.buildStatsPartitionNumNullsSql(params, queryType);
case MAX_SIZE:
case AVG_SIZE:
return nonPartitioned ? InternalSqlTemplate.buildStatsMaxAvgSizeSql(params, queryType)
: InternalSqlTemplate.buildStatsPartitionMaxAvgSizeSql(params, queryType);
case NDV:
case MAX_VALUE:
case MIN_VALUE:
return nonPartitioned ? InternalSqlTemplate.buildStatsMinMaxNdvValueSql(params, queryType)
: InternalSqlTemplate.buildStatsPartitionMinMaxNdvValueSql(params, queryType);
case DATA_SIZE:
default:
throw new DdlException("Unsupported statistics type: " + type);
}
}
protected TaskResult executeQuery(StatisticsDesc statsDesc) throws Exception {
StatsGranularity granularity = statsDesc.getStatsGranularity();
List<StatsType> statsTypes = statsDesc.getStatsTypes();
StatsCategory category = statsDesc.getStatsCategory();
String dbName = Env.getCurrentInternalCatalog()
.getDbOrDdlException(category.getDbId()).getFullName();
InternalQuery query = new InternalQuery(dbName, statement);
InternalQueryResult queryResult = query.query();
List<ResultRow> resultRows = queryResult.getResultRows();
if (resultRows != null && resultRows.size() == 1) {
ResultRow resultRow = resultRows.get(0);
List<String> columns = resultRow.getColumns();
TaskResult result = createNewTaskResult(category, granularity);
if (columns.size() == statsTypes.size()) {
for (int i = 0; i < columns.size(); i++) {
StatsType statsType = StatsType.fromString(columns.get(i));
result.getStatsTypeToValue().put(statsType, resultRow.getString(i));
}
return result;
}
}
// Statistics statements are executed singly and return only one row data
throw new DdlException("Statistics query result is incorrect, statement: "
+ statement + " queryResult: " + queryResult);
}
protected Map<String, String> getQueryParams(StatisticsDesc statsDesc) throws DdlException {
StatsCategory category = statsDesc.getStatsCategory();
Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(category.getDbId());
Table table = db.getTableOrDdlException(category.getTableId());
Map<String, String> params = Maps.newHashMap();
params.put(InternalSqlTemplate.TABLE, table.getName());
params.put(InternalSqlTemplate.PARTITION, category.getPartitionName());
params.put(InternalSqlTemplate.COLUMN, category.getColumnName());
return params;
}
}

View File

@ -1,49 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.statistics.util.InternalSqlTemplate;
import org.apache.doris.statistics.util.InternalSqlTemplate.QueryType;
import java.util.List;
import java.util.Map;
/**
* The @SampleSQLStatisticsTask is also a statistical task that executes a query
* and uses the query result as a statistical value (same as @SQLStatisticsTask).
* The only difference from the SQLStatisticsTask is that the query is a sampling table query.
*/
public class SampleSQLStatisticsTask extends SQLStatisticsTask {
// TODO(wzt): If the job configuration has percentage value, obtain from the job,
// if not, use the default value.
private int samplePercentage = Config.cbo_default_sample_percentage;
public SampleSQLStatisticsTask(long jobId, List<StatisticsDesc> statsDescs) {
super(jobId, statsDescs);
queryType = QueryType.SAMPLE;
}
@Override
protected Map<String, String> getQueryParams(StatisticsDesc statsDesc) throws DdlException {
Map<String, String> params = super.getQueryParams(statsDesc);
params.put(InternalSqlTemplate.PERCENT, String.valueOf(samplePercentage));
return params;
}
}

View File

@ -1,228 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import com.google.common.base.Strings;
import com.google.common.collect.Maps;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Map;
/**
* There are the statistics of all tables.
* The @Statistics are mainly used to provide input for the Optimizer's cost model.
*
* @idToTableStats: <@Long tableId, @TableStats tableStats>
* - Each table will have corresponding @TableStats
* - Those @TableStats are recorded in @idToTableStats form of MAP.
* - This facilitates the optimizer to quickly find the corresponding
* @TableStats based on the table id.
*/
public class Statistics {
private static final Logger LOG = LogManager.getLogger(Statistics.class);
private final Map<Long, TableStats> idToTableStats = Maps.newConcurrentMap();
/**
* Get the table stats for the given table id.
*
* @param tableId table id
* @return @TableStats
* @throws AnalysisException if table stats not exists
*/
public TableStats getTableStats(long tableId) throws AnalysisException {
TableStats tableStats = idToTableStats.get(tableId);
if (tableStats == null) {
throw new AnalysisException("Table " + tableId + " has no statistics");
}
return tableStats;
}
/**
* If the table statistics do not exist, the default statistics will be returned.
*/
public TableStats getTableStatsOrDefault(long tableId) throws AnalysisException {
return idToTableStats.getOrDefault(tableId, TableStats.getDefaultTableStats());
}
/**
* Get the partitions stats for the given table id.
*
* @param tableId table id
* @return partition name and @PartitionStats
* @throws AnalysisException if partitions stats not exists
*/
public Map<String, PartitionStats> getPartitionStats(long tableId) throws AnalysisException {
TableStats tableStats = getTableStats(tableId);
Map<String, PartitionStats> nameToPartitionStats = tableStats.getNameToPartitionStats();
if (nameToPartitionStats == null) {
throw new AnalysisException("Table " + tableId + " has no partition statistics");
}
return nameToPartitionStats;
}
/**
* Get the partition stats for the given table id and partition name.
*
* @param tableId table id
* @param partitionName partition name
* @return partition name and @PartitionStats
* @throws AnalysisException if partition stats not exists
*/
public Map<String, PartitionStats> getPartitionStats(long tableId, String partitionName)
throws AnalysisException {
Map<String, PartitionStats> partitionStats = getPartitionStats(tableId);
PartitionStats partitionStat = partitionStats.get(partitionName);
if (partitionStat == null) {
throw new AnalysisException("Partition " + partitionName + " of table " + tableId + " has no statistics");
}
Map<String, PartitionStats> statsMap = Maps.newHashMap();
statsMap.put(partitionName, partitionStat);
return statsMap;
}
/**
* Get the columns stats for the given table id.
*
* @param tableId table id
* @return column name and @ColumnStats
* @throws AnalysisException if columns stats not exists
*/
public Map<String, ColumnStat> getColumnStats(long tableId) throws AnalysisException {
TableStats tableStats = getTableStats(tableId);
Map<String, ColumnStat> nameToColumnStats = tableStats.getNameToColumnStats();
if (nameToColumnStats == null) {
throw new AnalysisException("Table " + tableId + " has no column statistics");
}
return nameToColumnStats;
}
/**
* Get the columns stats for the given table id and partition name.
*
* @param tableId table id
* @param partitionName partition name
* @return column name and @ColumnStats
* @throws AnalysisException if column stats not exists
*/
public Map<String, ColumnStat> getColumnStats(long tableId, String partitionName) throws AnalysisException {
Map<String, PartitionStats> partitionStats = getPartitionStats(tableId, partitionName);
PartitionStats partitionStat = partitionStats.get(partitionName);
if (partitionStat == null) {
throw new AnalysisException("Partition " + partitionName + " of table " + tableId + " has no statistics");
}
return partitionStat.getNameToColumnStats();
}
public void updateTableStats(long tableId, Map<StatsType, String> statsTypeToValue) throws AnalysisException {
synchronized (this) {
TableStats tableStats = getNotNullTableStats(tableId);
tableStats.updateTableStats(statsTypeToValue);
}
}
public void updatePartitionStats(long tableId, String partitionName, Map<StatsType, String> statsTypeToValue)
throws AnalysisException {
synchronized (this) {
TableStats tableStats = getNotNullTableStats(tableId);
tableStats.updatePartitionStats(partitionName, statsTypeToValue);
}
}
public void updateColumnStats(long tableId, String columnName, Type columnType,
Map<StatsType, String> statsTypeToValue) throws AnalysisException {
synchronized (this) {
TableStats tableStats = getNotNullTableStats(tableId);
tableStats.updateColumnStats(columnName, columnType, statsTypeToValue);
}
}
public void updateColumnStats(long tableId, String partitionName, String columnName, Type columnType,
Map<StatsType, String> statsTypeToValue) throws AnalysisException {
synchronized (this) {
PartitionStats partitionStats = getNotNullPartitionStats(tableId, partitionName);
partitionStats.updateColumnStats(columnName, columnType, statsTypeToValue);
}
}
public void dropTableStats(long tableId) {
dropPartitionStats(tableId, null);
}
public void dropPartitionStats(long tableId, String partitionName) {
synchronized (this) {
if (idToTableStats.containsKey(tableId)) {
if (Strings.isNullOrEmpty(partitionName)) {
idToTableStats.remove(tableId);
LOG.info("Deleted table(id={}) statistics.", tableId);
} else {
TableStats tableStats = idToTableStats.get(tableId);
tableStats.getNameToPartitionStats().remove(partitionName);
LOG.info("Deleted statistics for partition {} of table(id={}).",
partitionName, tableId);
}
}
}
}
// TODO: mock statistics need to be removed in the future
public void mockTableStatsWithRowCount(long tableId, double rowCount) {
TableStats tableStats = idToTableStats.get(tableId);
if (tableStats == null) {
tableStats = new TableStats(rowCount, 1);
idToTableStats.put(tableId, tableStats);
}
}
/**
* if the table stats is not exist, create a new one.
*
* @param tableId table id
* @return @TableStats
*/
private TableStats getNotNullTableStats(long tableId) {
TableStats tableStats = idToTableStats.get(tableId);
if (tableStats == null) {
tableStats = new TableStats();
idToTableStats.put(tableId, tableStats);
}
return tableStats;
}
/**
* if the partition stats is not exist, create a new one.
*
* @param tableId table id
* @param partitionName partition name
* @return @TableStats
*/
private PartitionStats getNotNullPartitionStats(long tableId, String partitionName) {
TableStats tableStats = getNotNullTableStats(tableId);
Map<String, PartitionStats> nameToPartitionStats = tableStats.getNameToPartitionStats();
PartitionStats partitionStats = nameToPartitionStats.get(partitionName);
if (partitionStats == null) {
partitionStats = new PartitionStats();
nameToPartitionStats.put(partitionName, partitionStats);
}
return partitionStats;
}
}

View File

@ -1,61 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import java.util.List;
public class StatisticsDesc {
private StatsCategory statsCategory;
private StatsGranularity statsGranularity;
private List<StatsType> statsTypes;
public StatisticsDesc(StatsCategory statsCategory,
StatsGranularity statsGranularity,
List<StatsType> statsTypes) {
this.statsCategory = statsCategory;
this.statsGranularity = statsGranularity;
this.statsTypes = statsTypes;
}
public StatsCategory getStatsCategory() {
return statsCategory;
}
public void setStatsCategory(StatsCategory statsCategory) {
this.statsCategory = statsCategory;
}
public StatsGranularity getStatsGranularity() {
return statsGranularity;
}
public void setStatsGranularity(StatsGranularity statsGranularity) {
this.statsGranularity = statsGranularity;
}
public List<StatsType> getStatsTypes() {
return statsTypes;
}
public void setStatsTypes(List<StatsType> statsTypes) {
this.statsTypes = statsTypes;
}
}

View File

@ -1,342 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.Table;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.util.TimeUtils;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.annotation.Nullable;
/***
* Used to store statistics job info,
* including job status, progress, etc.
*/
public class StatisticsJob {
private static final Logger LOG = LogManager.getLogger(StatisticsJob.class);
public enum JobState {
PENDING,
SCHEDULING,
RUNNING,
FINISHED,
FAILED,
CANCELLED
}
protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true);
private final long id = Env.getCurrentEnv().getNextId();
/**
* to be collected database stats.
*/
private final long dbId;
/**
* to be collected table stats.
*/
private final Set<Long> tblIds;
/**
* to be collected partition stats.
*/
private final Map<Long, List<String>> tableIdToPartitionName;
/**
* to be collected column stats.
*/
private final Map<Long, List<String>> tableIdToColumnName;
private final Map<String, String> properties;
/**
* to be executed tasks.
*/
private final List<StatisticsTask> tasks = Lists.newArrayList();
private JobState jobState = JobState.PENDING;
private final List<String> errorMsgs = Lists.newArrayList();
private final long createTime = System.currentTimeMillis();
private long startTime = -1L;
private long finishTime = -1L;
private int progress = 0;
public StatisticsJob(Long dbId,
Set<Long> tblIds,
Map<Long, List<String>> tblIdToPartitionName,
Map<Long, List<String>> tableIdToColumnName,
Map<String, String> properties) {
this.dbId = dbId;
this.tblIds = tblIds;
this.tableIdToPartitionName = tblIdToPartitionName;
this.tableIdToColumnName = tableIdToColumnName;
this.properties = properties == null ? Maps.newHashMap() : properties;
}
public void readLock() {
lock.readLock().lock();
}
public void readUnlock() {
lock.readLock().unlock();
}
private void writeLock() {
lock.writeLock().lock();
}
private void writeUnlock() {
lock.writeLock().unlock();
}
public long getId() {
return id;
}
public long getDbId() {
return dbId;
}
public Set<Long> getTblIds() {
return tblIds;
}
public Map<Long, List<String>> getTableIdToPartitionName() {
return tableIdToPartitionName;
}
public Map<Long, List<String>> getTableIdToColumnName() {
return tableIdToColumnName;
}
public Map<String, String> getProperties() {
return properties;
}
public List<StatisticsTask> getTasks() {
return tasks;
}
public List<String> getErrorMsgs() {
return errorMsgs;
}
public JobState getJobState() {
return jobState;
}
public long getCreateTime() {
return createTime;
}
public long getStartTime() {
return startTime;
}
public long getFinishTime() {
return finishTime;
}
public int getProgress() {
return progress;
}
public void updateJobState(JobState newState) throws DdlException {
LOG.info("To change statistics job(id={}) state from {} to {}", id, jobState, newState);
writeLock();
JobState fromState = jobState;
try {
unprotectedUpdateJobState(newState);
} catch (DdlException e) {
LOG.warn(e.getMessage(), e);
throw e;
} finally {
writeUnlock();
}
LOG.info("Statistics job(id={}) state changed from {} to {}", id, fromState, jobState);
}
private void unprotectedUpdateJobState(JobState newState) throws DdlException {
// PENDING -> PENDING/SCHEDULING/FAILED/CANCELLED
if (jobState == JobState.PENDING) {
switch (newState) {
case PENDING:
case SCHEDULING:
break;
case FAILED:
case CANCELLED:
finishTime = System.currentTimeMillis();
break;
default:
throw new DdlException("Invalid job state transition from " + jobState + " to " + newState);
}
} else if (jobState == JobState.SCHEDULING) { // SCHEDULING -> RUNNING/FAILED/CANCELLED
switch (newState) {
case RUNNING:
startTime = System.currentTimeMillis();
break;
case FAILED:
case CANCELLED:
finishTime = System.currentTimeMillis();
break;
default:
throw new DdlException("Invalid job state transition from " + jobState + " to " + newState);
}
} else if (jobState == JobState.RUNNING) { // RUNNING -> FINISHED/FAILED/CANCELLED
switch (newState) {
case FINISHED:
case FAILED:
case CANCELLED:
// set finish time
finishTime = System.currentTimeMillis();
break;
default:
throw new DdlException("Invalid job state transition from " + jobState + " to " + newState);
}
} else {
// TODO
throw new DdlException("Invalid job state transition from " + jobState + " to " + newState);
}
jobState = newState;
}
public void updateJobInfoByTaskId(Long taskId, String errorMsg) throws DdlException {
writeLock();
try {
for (StatisticsTask task : tasks) {
if (taskId == task.getId()) {
if (Strings.isNullOrEmpty(errorMsg)) {
progress += 1;
if (progress == tasks.size()) {
unprotectedUpdateJobState(StatisticsJob.JobState.FINISHED);
}
task.updateTaskState(StatisticsTask.TaskState.FINISHED);
} else {
errorMsgs.add(errorMsg);
task.updateTaskState(StatisticsTask.TaskState.FAILED);
unprotectedUpdateJobState(StatisticsJob.JobState.FAILED);
}
return;
}
}
} finally {
writeUnlock();
}
}
public List<Comparable> getShowInfo(@Nullable Long tableId) throws AnalysisException {
List<Comparable> result = Lists.newArrayList();
result.add(Long.toString(id));
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
result.add(TimeUtils.longToTimeString(createTime, dateFormat));
result.add(startTime != -1L ? TimeUtils.longToTimeString(startTime, dateFormat) : "N/A");
result.add(finishTime != -1L ? TimeUtils.longToTimeString(finishTime, dateFormat) : "N/A");
StringBuilder sb = new StringBuilder();
for (String errorMsg : errorMsgs) {
sb.append(errorMsg).append("\n");
}
result.add(sb.toString());
int totalTaskNum = 0;
int finishedTaskNum = 0;
Map<Long, Set<String>> tblIdToCols = Maps.newHashMap();
for (StatisticsTask task : tasks) {
List<StatisticsDesc> statsDescs = task.getStatsDescs();
if (!statsDescs.isEmpty()) {
// The same task has the same stats properties
StatsCategory statsCategory = statsDescs.get(0).getStatsCategory();
long tblId = statsCategory.getTableId();
if (tableId == null || tableId == tblId) {
totalTaskNum++;
if (task.getTaskState() == StatisticsTask.TaskState.FINISHED) {
finishedTaskNum++;
}
String col = statsCategory.getColumnName();
if (Strings.isNullOrEmpty(col)) {
continue;
}
tblIdToCols.computeIfAbsent(tblId,
(key) -> Sets.newHashSet()).add(col);
}
}
}
List<String> scope = Lists.newArrayList();
Database db = Env.getCurrentEnv().getInternalCatalog().getDbOrAnalysisException(dbId);
for (Long tblId : tblIds) {
try {
Table table = db.getTableOrAnalysisException(tblId);
List<Column> baseSchema = table.getBaseSchema();
Set<String> cols = tblIdToCols.get(tblId);
if (cols != null) {
if (baseSchema.size() == cols.size()) {
scope.add(table.getName() + "(*)");
} else {
scope.add(table.getName() + "(" + StringUtils.join(cols.toArray(), ", ") + ")");
}
}
} catch (AnalysisException e) {
// catch this exception when table is dropped
LOG.info("get table failed, tableId: " + tblId, e);
}
}
// exclude invalid info
if (scope.isEmpty()) {
return Collections.emptyList();
}
result.add(StringUtils.join(scope.toArray(), ","));
result.add(finishedTaskNum + "/" + totalTaskNum);
if (totalTaskNum > 0 && totalTaskNum == finishedTaskNum) {
result.add("FINISHED");
} else {
result.add(jobState.toString());
}
return result;
}
}

View File

@ -1,192 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.ShowAnalyzeStmt;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.Table;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.util.ListComparator;
import org.apache.doris.common.util.OrderByPair;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
/**
* For unified management of statistics job,
* including job addition, cancellation, scheduling, etc.
*/
public class StatisticsJobManager {
private static final Logger LOG = LogManager.getLogger(StatisticsJobManager.class);
/**
* save statistics job status information
*/
private final Map<Long, StatisticsJob> idToStatisticsJob = Maps.newConcurrentMap();
public Map<Long, StatisticsJob> getIdToStatisticsJob() {
return idToStatisticsJob;
}
/**
* The statistical job has the following restrict:
* - Rule1: The same table cannot have two unfinished statistics jobs
* - Rule2: The unfinished statistics job could not more than Config.max_statistics_job_num
* - Rule3: The job for external table is not supported
*/
private void checkRestrict(long dbId, Set<Long> tableIds) throws AnalysisException {
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbId);
db.readLock();
try {
// check table type
for (Long tableId : tableIds) {
Table table = db.getTableOrAnalysisException(tableId);
if (table.getType() != Table.TableType.OLAP) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_NOT_OLAP_TABLE, db.getFullName(),
table.getName(), "ANALYZE");
}
}
} finally {
db.readUnlock();
}
int unfinishedJobs = 0;
// check table unfinished job
for (StatisticsJob statisticsJob : idToStatisticsJob.values()) {
StatisticsJob.JobState jobState = statisticsJob.getJobState();
Set<Long> tblIds = statisticsJob.getTblIds();
if (jobState == StatisticsJob.JobState.PENDING
|| jobState == StatisticsJob.JobState.SCHEDULING
|| jobState == StatisticsJob.JobState.RUNNING) {
for (Long tableId : tableIds) {
if (tblIds.contains(tableId)) {
throw new AnalysisException("The table(id=" + tableId + ") have unfinished statistics jobs");
}
}
unfinishedJobs++;
}
}
// check the number of unfinished tasks
if (unfinishedJobs > Config.cbo_max_statistics_job_num) {
throw new AnalysisException("The unfinished statistics job could not more than cbo_max_statistics_job_num: "
+ Config.cbo_max_statistics_job_num);
}
}
public List<List<String>> getAnalyzeJobInfos(ShowAnalyzeStmt showStmt) throws AnalysisException {
List<List<Comparable>> results = Lists.newArrayList();
String stateValue = showStmt.getStateValue();
StatisticsJob.JobState jobState = null;
if (!Strings.isNullOrEmpty(stateValue)) {
jobState = StatisticsJob.JobState.valueOf(stateValue);
}
// step 1: get job infos
List<Long> jobIds = showStmt.getJobIds();
if (jobIds != null && !jobIds.isEmpty()) {
for (Long jobId : jobIds) {
StatisticsJob statisticsJob = idToStatisticsJob.get(jobId);
if (statisticsJob == null) {
throw new AnalysisException("No such job id: " + jobId);
}
if (jobState == null || jobState == statisticsJob.getJobState()) {
List<Comparable> showInfo = statisticsJob.getShowInfo(null);
if (showInfo == null || showInfo.isEmpty()) {
continue;
}
results.add(showInfo);
}
}
} else {
long dbId = showStmt.getDbId();
Set<Long> tblIds = showStmt.getTblIds();
for (StatisticsJob statisticsJob : idToStatisticsJob.values()) {
long jobDbId = statisticsJob.getDbId();
if (jobDbId == dbId) {
// check the state
if (jobState == null || jobState == statisticsJob.getJobState()) {
Set<Long> jobTblIds = statisticsJob.getTblIds();
// get the intersection of two sets
Set<Long> set = Sets.newHashSet();
set.addAll(jobTblIds);
set.retainAll(tblIds);
for (long tblId : set) {
List<Comparable> showInfo = statisticsJob.getShowInfo(tblId);
if (showInfo == null || showInfo.isEmpty()) {
continue;
}
results.add(showInfo);
}
}
}
}
}
// step2: order the result
ListComparator<List<Comparable>> comparator;
List<OrderByPair> orderByPairs = showStmt.getOrderByPairs();
if (orderByPairs == null) {
// sort by id asc
comparator = new ListComparator<>(0);
} else {
OrderByPair[] orderByPairArr = new OrderByPair[orderByPairs.size()];
comparator = new ListComparator<>(orderByPairs.toArray(orderByPairArr));
}
results.sort(comparator);
// step3: filter by limit
long limit = showStmt.getLimit();
long offset = showStmt.getOffset() == -1L ? 0 : showStmt.getOffset();
if (offset >= results.size()) {
results = Collections.emptyList();
} else if (limit != -1L) {
if ((limit + offset) >= results.size()) {
results = results.subList((int) offset, results.size());
} else {
results = results.subList((int) offset, (int) (limit + offset));
}
}
// step4: convert to result and return it
List<List<String>> rows = Lists.newArrayList();
for (List<Comparable> result : results) {
List<String> row = result.stream().map(Object::toString)
.collect(Collectors.toList());
rows.add(row);
}
return rows;
}
}

View File

@ -1,530 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.Table;
import org.apache.doris.catalog.Tablet;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.util.MasterDaemon;
import org.apache.doris.statistics.StatsCategory.Category;
import org.apache.doris.statistics.StatsGranularity.Granularity;
import com.google.common.collect.Lists;
import com.google.common.collect.Queues;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
/**
* Schedule statistics job.
* 1. divide job to multi task
* 2. submit all task to StatisticsTaskScheduler
* Switch job state from pending to scheduling.
*/
public class StatisticsJobScheduler extends MasterDaemon {
private static final Logger LOG = LogManager.getLogger(StatisticsJobScheduler.class);
/**
* If the table row-count is greater than the maximum number of Be scans for a single BE,
* we'll divide subtasks by partition. relevant values(3700000000L&600000000L) are derived from test.
* COUNT_MAX_SCAN_PER_TASK is for count(expr), NDV_MAX_SCAN_PER_TASK is for min(c1)/max(c1)/ndv(c1).
*/
private static final long COUNT_MAX_SCAN_PER_TASK = 3700000000L;
private static final long NDV_MAX_SCAN_PER_TASK = 600000000L;
/**
* if the table row count is greater than the value, use sampleSqlTask instead of SqlTask.
*/
private static final int MIN_SAMPLE_ROWS = 200000;
/**
* Different statistics need to be collected for the jobs submitted by users.
* if all statistics be collected at the same time, the cluster may be overburdened
* and normal query services may be affected. Therefore, we put the jobs into the queue
* and schedule them one by one, and finally divide each job to several subtasks and execute them.
*/
public final Queue<StatisticsJob> pendingJobQueue
= Queues.newLinkedBlockingQueue(Config.cbo_max_statistics_job_num);
public StatisticsJobScheduler() {
super("Statistics job scheduler",
Config.statistic_job_scheduler_execution_interval_ms);
}
@Override
protected void runAfterCatalogReady() {
StatisticsJob pendingJob = pendingJobQueue.peek();
if (pendingJob != null) {
try {
if (pendingJob.getTasks().size() == 0) {
divide(pendingJob);
}
List<StatisticsTask> tasks = pendingJob.getTasks();
Env.getCurrentEnv().getStatisticsTaskScheduler().addTasks(tasks);
pendingJob.updateJobState(StatisticsJob.JobState.SCHEDULING);
pendingJobQueue.remove();
} catch (IllegalStateException e) {
// throw IllegalStateException if the queue is full, re-add the tasks next time
LOG.info("The statistics task queue is full, schedule the job(id={}) later", pendingJob.getId());
} catch (DdlException e) {
pendingJobQueue.remove();
try {
// TODO change to without exception
pendingJob.updateJobState(StatisticsJob.JobState.FAILED);
} catch (DdlException ddlException) {
LOG.fatal(ddlException.getMessage(), e);
}
LOG.info("Failed to schedule the statistical job(id={})", pendingJob.getId(), e);
}
}
}
public void addPendingJob(StatisticsJob statisticsJob) throws IllegalStateException {
pendingJobQueue.add(statisticsJob);
}
/**
* Statistics tasks are of the following types:
* table:
* - row_count: table row count are critical in estimating cardinality and memory usage of scan nodes.
* - data_size: table size, not applicable to CBO, mainly used to monitor and manage table size.
* column:
* - num_distinct_value: used to determine the selectivity of an equivalent expression.
* - min: The minimum value.
* - max: The maximum value.
* - num_nulls: number of nulls.
* - avg_col_len: the average length of a column, in bytes, is used for memory and network IO evaluation.
* - max_col_len: the Max length of the column, in bytes, is used for memory and network IO evaluation.
* <p>
* Divide:
* - min, max, ndv: These three full indicators are collected by a sub-task.
* - max_col_lens, avg_col_lens: Two sampling indicators were collected by a sub-task.
* <p>
* If the table row-count is greater than the maximum number of Be scans for a single BE,
* we'll divide subtasks by partition. relevant values(3700000000L&600000000L) are derived from test.
* <p>
* Eventually, we will get several subtasks of the following types:
*
* @throws DdlException DdlException
* @see MetaStatisticsTask
* @see SampleSQLStatisticsTask
* @see SQLStatisticsTask
*/
private void divide(StatisticsJob job) throws DdlException {
Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(job.getDbId());
Set<Long> tblIds = job.getTblIds();
for (Long tblId : tblIds) {
Optional<Table> optionalTbl = db.getTable(tblId);
if (optionalTbl.isPresent()) {
Table table = optionalTbl.get();
if (!table.isPartitioned()) {
getStatsTaskByTable(job, tblId);
} else {
getStatsTaskByPartition(job, tblId);
}
} else {
LOG.warn("Table(id={}) not found in the database {}", tblId, db.getFullName());
}
}
}
/**
* For non-partitioned table, dividing the job into several subtasks.
*
* @param job statistics job
* @param tableId table id
* @throws DdlException exception
*/
private void getStatsTaskByTable(StatisticsJob job, long tableId) throws DdlException {
Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(job.getDbId());
OlapTable table = (OlapTable) db.getTableOrDdlException(tableId);
if (table.getDataSize() == 0) {
LOG.info("Do not collect statistics for empty table {}", table.getName());
return;
}
Map<Long, List<String>> tblIdToColName = job.getTableIdToColumnName();
List<String> colNames = tblIdToColName.get(tableId);
List<Long> backendIds = Env.getCurrentSystemInfo().getBackendIds(true);
// step1: collect statistics by metadata
List<StatisticsDesc> descs = Lists.newArrayList();
// table data size
StatsCategory dsCategory = getTableStatsCategory(job.getDbId(), tableId);
StatsGranularity dsGranularity = getTableGranularity(tableId);
StatisticsDesc dsStatsDesc = new StatisticsDesc(dsCategory,
dsGranularity, Collections.singletonList(StatsType.DATA_SIZE));
descs.add(dsStatsDesc);
// table row count
if (table.getKeysType() == KeysType.DUP_KEYS) {
StatsCategory rcCategory = getTableStatsCategory(job.getDbId(), tableId);
StatsGranularity rcGranularity = getTableGranularity(tableId);
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
descs.add(rcStatsDesc);
}
// variable-length columns
List<String> strColNames = Lists.newArrayList();
// column max size and avg size
for (String colName : colNames) {
Column column = table.getColumn(colName);
if (column == null) {
LOG.info("Column {} not found in table {}", colName, table.getName());
continue;
}
Type colType = column.getType();
if (colType.isStringType()) {
strColNames.add(colName);
continue;
}
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName);
StatsGranularity colGranularity = getTableGranularity(tableId);
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
colGranularity, Arrays.asList(StatsType.MAX_SIZE, StatsType.AVG_SIZE));
descs.add(colStatsDesc);
}
// all meta statistics are collected in one task
MetaStatisticsTask metaStatsTask = new MetaStatisticsTask(job.getId(), descs);
job.getTasks().add(metaStatsTask);
long rowCount = table.getRowCount();
// step2: collect statistics by sql
// table row count (table model is AGGREGATE or UNIQUE)
if (table.getKeysType() != KeysType.DUP_KEYS) {
if (rowCount < backendIds.size() * COUNT_MAX_SCAN_PER_TASK) {
StatsCategory rcCategory = getTableStatsCategory(job.getDbId(), tableId);
StatsGranularity rcGranularity = getTableGranularity(tableId);
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
Collections.singletonList(rcStatsDesc));
job.getTasks().add(sqlTask);
} else {
// divide subtasks by tablet
Collection<Partition> partitions = table.getPartitions();
for (Partition partition : partitions) {
Collection<Tablet> tablets = partition.getBaseIndex().getTablets();
tablets.forEach(tablet -> {
StatsCategory rcCategory = getTableStatsCategory(job.getDbId(), tableId);
StatsGranularity rcGranularity = getTabletGranularity(tablet.getId());
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
Collections.singletonList(rcStatsDesc));
job.getTasks().add(sqlTask);
});
}
}
}
// column max size, avg size
for (String colName : strColNames) {
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName);
StatsGranularity colGranularity = getTableGranularity(tableId);
getColumnSizeSqlTask(job, rowCount, colCategory, colGranularity);
}
// column num nulls
for (String colName : colNames) {
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName);
StatsGranularity colGranularity = getTableGranularity(tableId);
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
colGranularity, Collections.singletonList(StatsType.NUM_NULLS));
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
Collections.singletonList(colStatsDesc));
job.getTasks().add(sqlTask);
}
// column max value, min value and ndv
for (String colName : colNames) {
if (rowCount < backendIds.size() * NDV_MAX_SCAN_PER_TASK) {
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName);
StatsGranularity colGranularity = getTableGranularity(tableId);
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV));
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
Collections.singletonList(colStatsDesc));
job.getTasks().add(sqlTask);
} else {
// for non-partitioned table system automatically
// generates a partition with the same name as the table name
Collection<Partition> partitions = table.getPartitions();
for (Partition partition : partitions) {
List<Tablet> tablets = partition.getBaseIndex().getTablets();
tablets.forEach(tablet -> {
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, colName);
StatsGranularity colGranularity = getTabletGranularity(tablet.getId());
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV));
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
Collections.singletonList(colStatsDesc));
job.getTasks().add(sqlTask);
});
}
}
}
}
/**
* If table is partitioned, dividing the job into several subtasks by partition.
*
* @param job statistics job
* @param tableId table id
* @throws DdlException exception
*/
private void getStatsTaskByPartition(StatisticsJob job, long tableId) throws DdlException {
Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(job.getDbId());
OlapTable table = (OlapTable) db.getTableOrDdlException(tableId);
Map<Long, List<String>> tblIdToColName = job.getTableIdToColumnName();
List<String> colNames = tblIdToColName.get(tableId);
Map<Long, List<String>> tblIdToPartitionName = job.getTableIdToPartitionName();
List<String> partitionNames = tblIdToPartitionName.get(tableId);
List<Long> backendIds = Env.getCurrentSystemInfo().getBackendIds(true);
for (String partitionName : partitionNames) {
Partition partition = table.getPartition(partitionName);
if (partition == null) {
LOG.info("Partition {} not found in the table {}", partitionName, table.getName());
continue;
}
if (partition.getDataSize() == 0) {
LOG.info("Do not collect statistics for empty partition {} in the table {}",
partitionName, table.getName());
continue;
}
long partitionId = partition.getId();
long rowCount = partition.getBaseIndex().getRowCount();
// step1: collect statistics by metadata
List<StatisticsDesc> descs = Lists.newArrayList();
// partition data size
StatsCategory dsCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName);
StatsGranularity dsGranularity = getPartitionGranularity(partitionId);
StatisticsDesc dsStatsDesc = new StatisticsDesc(dsCategory,
dsGranularity, Collections.singletonList(StatsType.DATA_SIZE));
descs.add(dsStatsDesc);
// partition row count
if (table.getKeysType() == KeysType.DUP_KEYS) {
StatsCategory rcCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName);
StatsGranularity rcGranularity = getPartitionGranularity(partitionId);
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
descs.add(rcStatsDesc);
}
// variable-length columns
List<String> strColNames = Lists.newArrayList();
// column max size and avg size
for (String colName : colNames) {
Column column = table.getColumn(colName);
if (column == null) {
LOG.info("Column {} not found in the table {}", colName, table.getName());
continue;
}
Type colType = column.getType();
if (colType.isStringType()) {
strColNames.add(colName);
continue;
}
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName);
StatsGranularity colGranularity = getPartitionGranularity(partitionId);
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
colGranularity, Arrays.asList(StatsType.MAX_SIZE, StatsType.AVG_SIZE));
descs.add(colStatsDesc);
}
// all meta statistics are collected in one task
MetaStatisticsTask metaStatsTask = new MetaStatisticsTask(job.getId(), descs);
job.getTasks().add(metaStatsTask);
// step2: collect statistics by sql
// partition row count (table model is AGGREGATE or UNIQUE)
if (table.getKeysType() != KeysType.DUP_KEYS) {
if (rowCount < backendIds.size() * COUNT_MAX_SCAN_PER_TASK) {
StatsCategory rcCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName);
StatsGranularity rcGranularity = getPartitionGranularity(partitionId);
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
Collections.singletonList(rcStatsDesc));
job.getTasks().add(sqlTask);
} else {
// divide subtasks by tablet
List<Tablet> tablets = partition.getBaseIndex().getTablets();
tablets.forEach(tablet -> {
StatsCategory rcCategory = getPartitionStatsCategory(job.getDbId(), tableId, partitionName);
StatsGranularity rcGranularity = getTabletGranularity(tablet.getId());
StatisticsDesc rcStatsDesc = new StatisticsDesc(rcCategory,
rcGranularity, Collections.singletonList(StatsType.ROW_COUNT));
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
Collections.singletonList(rcStatsDesc));
job.getTasks().add(sqlTask);
});
}
}
// column max size, avg size
for (String colName : strColNames) {
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName);
StatsGranularity colGranularity = getPartitionGranularity(partitionId);
getColumnSizeSqlTask(job, rowCount, colCategory, colGranularity);
}
// column null nums
for (String colName : colNames) {
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName);
StatsGranularity colGranularity = getPartitionGranularity(partitionId);
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
colGranularity, Collections.singletonList(StatsType.NUM_NULLS));
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
Collections.singletonList(colStatsDesc));
job.getTasks().add(sqlTask);
}
// column max value, min value and ndv
for (String colName : colNames) {
if (rowCount < backendIds.size() * NDV_MAX_SCAN_PER_TASK) {
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(), tableId, partitionName, colName);
StatsGranularity colGranularity = getPartitionGranularity(partitionId);
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV));
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
Collections.singletonList(colStatsDesc));
job.getTasks().add(sqlTask);
} else {
// divide subtasks by tablet
List<Tablet> tablets = partition.getBaseIndex().getTablets();
tablets.forEach(tablet -> {
StatsCategory colCategory = getColumnStatsCategory(job.getDbId(),
tableId, partitionName, colName);
StatsGranularity colGranularity = getTabletGranularity(tablet.getId());
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
colGranularity, Arrays.asList(StatsType.MAX_VALUE, StatsType.MIN_VALUE, StatsType.NDV));
SQLStatisticsTask sqlTask = new SQLStatisticsTask(job.getId(),
Collections.singletonList(colStatsDesc));
job.getTasks().add(sqlTask);
});
}
}
}
}
private void getColumnSizeSqlTask(StatisticsJob job, long rowCount,
StatsCategory colCategory, StatsGranularity colGranularity) {
StatisticsDesc colStatsDesc = new StatisticsDesc(colCategory,
colGranularity, Arrays.asList(StatsType.MAX_SIZE, StatsType.AVG_SIZE));
SQLStatisticsTask sqlTask;
if (rowCount < MIN_SAMPLE_ROWS) {
sqlTask = new SQLStatisticsTask(job.getId(), Collections.singletonList(colStatsDesc));
} else {
sqlTask = new SampleSQLStatisticsTask(job.getId(), Collections.singletonList(colStatsDesc));
}
job.getTasks().add(sqlTask);
}
private StatsCategory getTableStatsCategory(long dbId, long tableId) {
StatsCategory category = new StatsCategory();
category.setCategory(StatsCategory.Category.TABLE);
category.setDbId(dbId);
category.setTableId(tableId);
return category;
}
private StatsCategory getPartitionStatsCategory(long dbId, long tableId, String partitionName) {
StatsCategory category = new StatsCategory();
category.setCategory(Category.PARTITION);
category.setDbId(dbId);
category.setTableId(tableId);
category.setPartitionName(partitionName);
return category;
}
private StatsCategory getColumnStatsCategory(long dbId, long tableId, String columnName) {
StatsCategory category = new StatsCategory();
category.setDbId(dbId);
category.setTableId(tableId);
category.setColumnName(columnName);
category.setCategory(Category.COLUMN);
category.setColumnName(columnName);
return category;
}
private StatsCategory getColumnStatsCategory(long dbId, long tableId, String partitionName, String columnName) {
StatsCategory category = new StatsCategory();
category.setDbId(dbId);
category.setTableId(tableId);
category.setPartitionName(partitionName);
category.setColumnName(columnName);
category.setCategory(Category.COLUMN);
category.setColumnName(columnName);
return category;
}
private StatsGranularity getTableGranularity(long tableId) {
StatsGranularity granularity = new StatsGranularity();
granularity.setTableId(tableId);
granularity.setGranularity(Granularity.TABLE);
return granularity;
}
private StatsGranularity getPartitionGranularity(long partitionId) {
StatsGranularity granularity = new StatsGranularity();
granularity.setPartitionId(partitionId);
granularity.setGranularity(Granularity.PARTITION);
return granularity;
}
private StatsGranularity getTabletGranularity(long tabletId) {
StatsGranularity granularity = new StatsGranularity();
granularity.setTabletId(tabletId);
granularity.setGranularity(Granularity.TABLET);
return granularity;
}
}

View File

@ -1,508 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.AlterColumnStatsStmt;
import org.apache.doris.analysis.AlterTableStatsStmt;
import org.apache.doris.analysis.DropTableStatsStmt;
import org.apache.doris.analysis.ShowTableStatsStmt;
import org.apache.doris.analysis.TableName;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.catalog.Table;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
import org.apache.doris.statistics.StatsGranularity.Granularity;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.lang3.math.NumberUtils;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class StatisticsManager {
private final Statistics statistics;
public StatisticsManager() {
statistics = new Statistics();
}
public Statistics getStatistics() {
return statistics;
}
/**
* Support for deleting table or partition statistics.
*
* @param stmt get table name and partition name from it.
*/
public void dropStats(DropTableStatsStmt stmt) {
Map<Long, Set<String>> tblIdToPartition = stmt.getTblIdToPartition();
if (tblIdToPartition != null && !tblIdToPartition.isEmpty()) {
tblIdToPartition.forEach((tableId, partitions) -> {
if (partitions == null || partitions.isEmpty()) {
statistics.dropTableStats(tableId);
} else {
for (String partition : partitions) {
statistics.dropPartitionStats(tableId, partition);
}
}
});
}
}
/**
* Alter table or partition stats. if partition name is not null, update partition stats.
*
* @param stmt alter table stats stmt
* @throws AnalysisException if table or partition not exist
*/
public void alterTableStatistics(AlterTableStatsStmt stmt) throws AnalysisException {
Table table = validateTableName(stmt.getTableName());
List<String> partitionNames = stmt.getPartitionNames();
Map<StatsType, String> statsTypeToValue = stmt.getStatsTypeToValue();
if (partitionNames.isEmpty()) {
statistics.updateTableStats(table.getId(), statsTypeToValue);
return;
}
for (String partitionName : partitionNames) {
partitionName = validatePartitionName(table, partitionName);
statistics.updatePartitionStats(table.getId(), partitionName, statsTypeToValue);
}
}
/**
* Alter column stats. if partition name is not null, update column of partition stats.
*
* @param stmt alter column stats stmt
* @throws AnalysisException if table, column or partition not exist
*/
public void alterColumnStatistics(AlterColumnStatsStmt stmt) throws AnalysisException {
Table table = validateTableName(stmt.getTableName());
String colName = stmt.getColumnName();
List<String> partitionNames = stmt.getPartitionNames();
Map<StatsType, String> statsTypeToValue = stmt.getStatsTypeToValue();
if ((partitionNames.isEmpty()) && table instanceof OlapTable
&& !((OlapTable) table).getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) {
throw new AnalysisException("Partitioned table must specify partition name.");
}
if (partitionNames.isEmpty()) {
Column column = validateColumn(table, colName);
Type colType = column.getType();
statistics.updateColumnStats(table.getId(), colName, colType, statsTypeToValue);
return;
}
for (String partitionName : partitionNames) {
validatePartitionName(table, partitionName);
Column column = validateColumn(table, colName);
Type colType = column.getType();
statistics.updateColumnStats(table.getId(), partitionName, colName, colType, statsTypeToValue);
}
}
/**
* Update statistics. there are three types of statistics: column, table and column.
*
* @param statsTaskResults statistics task results
* @throws AnalysisException if column, table or partition not exist
*/
public void updateStatistics(List<StatisticsTaskResult> statsTaskResults) throws AnalysisException {
// tablet granularity stats(row count, max value, min value, ndv)
Map<StatsType, Map<TaskResult, List<String>>> tabletStats = Maps.newHashMap();
for (StatisticsTaskResult statsTaskResult : statsTaskResults) {
if (statsTaskResult != null) {
List<TaskResult> taskResults = statsTaskResult.getTaskResults();
for (TaskResult result : taskResults) {
validateResult(result);
long tblId = result.getTableId();
Map<StatsType, String> statsTypeToValue = result.getStatsTypeToValue();
if (result.getGranularity() == Granularity.TABLET) {
statsTypeToValue.forEach((statsType, value) -> {
if (tabletStats.containsKey(statsType)) {
Map<TaskResult, List<String>> resultToValue = tabletStats.get(statsType);
List<String> values = resultToValue.get(result);
values.add(value);
} else {
Map<TaskResult, List<String>> resultToValue = Maps.newHashMap();
List<String> values = Lists.newArrayList();
values.add(value);
resultToValue.put(result, values);
tabletStats.put(statsType, resultToValue);
}
});
continue;
}
switch (result.getCategory()) {
case TABLE:
statistics.updateTableStats(tblId, statsTypeToValue);
break;
case PARTITION:
String partitionName = result.getPartitionName();
statistics.updatePartitionStats(tblId, partitionName, statsTypeToValue);
break;
case COLUMN:
updateColumnStats(result, statsTypeToValue);
break;
default:
throw new AnalysisException("Unknown stats category: " + result.getCategory());
}
}
}
}
// update tablet granularity stats
updateTabletStats(tabletStats);
}
private void updateColumnStats(TaskResult result, Map<StatsType, String> statsTypeToValue)
throws AnalysisException {
long dbId = result.getDbId();
long tblId = result.getTableId();
String partitionName = result.getPartitionName();
String colName = result.getColumnName();
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbId);
OlapTable table = (OlapTable) db.getTableOrAnalysisException(tblId);
Column column = table.getColumn(colName);
Type colType = column.getType();
switch (result.getGranularity()) {
case TABLE:
statistics.updateColumnStats(tblId, colName, colType, statsTypeToValue);
break;
case PARTITION:
statistics.updateColumnStats(tblId, partitionName, colName, colType, statsTypeToValue);
break;
default:
// The tablet granularity is handle separately
throw new AnalysisException("Unknown granularity: " + result.getGranularity());
}
}
private void updateTabletStats(Map<StatsType, Map<TaskResult, List<String>>> tabletStats)
throws AnalysisException {
for (Map.Entry<StatsType, Map<TaskResult, List<String>>> statsEntry : tabletStats.entrySet()) {
StatsType statsType = statsEntry.getKey();
Map<TaskResult, List<String>> resultToValue = statsEntry.getValue();
for (Map.Entry<TaskResult, List<String>> resultEntry : resultToValue.entrySet()) {
TaskResult result = resultEntry.getKey();
List<String> values = resultEntry.getValue();
switch (statsType) {
case ROW_COUNT:
updateTabletRowCount(result, values);
break;
case MAX_VALUE:
updateTabletMaxValue(result, values);
break;
case MIN_VALUE:
updateTabletMinValue(result, values);
break;
case NDV:
updateTabletNDV(result, values);
break;
default:
throw new AnalysisException("Unknown stats type: " + statsType);
}
}
}
}
/**
* Get the statistics of a table. if specified partition name, get the statistics of the partition.
*
* @param stmt statement
* @return partition or table statistics
* @throws AnalysisException statistics not exist
*/
public List<List<String>> showTableStatsList(ShowTableStatsStmt stmt) throws AnalysisException {
String dbName = stmt.getDbName();
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbName);
String tableName = stmt.getTableName();
List<List<String>> result = Lists.newArrayList();
if (tableName != null) {
Table table = db.getTableOrAnalysisException(tableName);
// check priv
if (!Env.getCurrentEnv().getAuth()
.checkTblPriv(ConnectContext.get(), dbName, tableName, PrivPredicate.SHOW)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "SHOW CREATE TABLE",
ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(),
dbName + ": " + tableName);
}
List<String> partitionNames = stmt.getPartitionNames();
if (partitionNames.isEmpty()) {
result.add(showTableStats(table));
} else {
for (String partitionName : partitionNames) {
validatePartitionName(table, partitionName);
result.add(showTableStats(table, partitionName));
}
}
} else {
for (Table table : db.getTables()) {
if (!Env.getCurrentEnv().getAuth()
.checkTblPriv(ConnectContext.get(), dbName, table.getName(), PrivPredicate.SHOW)) {
continue;
}
try {
result.add(showTableStats(table));
} catch (AnalysisException e) {
// ignore no stats table
}
}
}
return result;
}
private List<String> showTableStats(Table table) throws AnalysisException {
TableStats tableStats = statistics.getTableStats(table.getId());
if (tableStats == null) {
throw new AnalysisException("There is no statistics in this table:" + table.getName());
}
List<String> row = Lists.newArrayList();
row.add(table.getName());
row.addAll(tableStats.getShowInfo());
return row;
}
private List<String> showTableStats(Table table, String partitionName) throws AnalysisException {
Map<String, PartitionStats> partitionStats = statistics.getPartitionStats(table.getId(), partitionName);
PartitionStats partitionStat = partitionStats.get(partitionName);
if (partitionStat == null) {
throw new AnalysisException("There is no statistics in this partition:" + partitionName);
}
List<String> row = Lists.newArrayList();
row.add(partitionName);
row.addAll(partitionStat.getShowInfo());
return row;
}
private List<List<String>> showColumnStats(long tableId) throws AnalysisException {
List<List<String>> result = Lists.newArrayList();
Map<String, ColumnStat> columnStats = statistics.getColumnStats(tableId);
columnStats.forEach((key, stats) -> {
List<String> row = Lists.newArrayList();
row.add(key);
row.addAll(stats.getShowInfo());
result.add(row);
});
return result;
}
private List<List<String>> showColumnStats(long tableId, String partitionName) throws AnalysisException {
List<List<String>> result = Lists.newArrayList();
Map<String, ColumnStat> columnStats = statistics.getColumnStats(tableId, partitionName);
columnStats.forEach((key, stats) -> {
List<String> row = Lists.newArrayList();
row.add(key);
row.addAll(stats.getShowInfo());
result.add(row);
});
return result;
}
private void updateTabletRowCount(TaskResult result, List<String> values) throws AnalysisException {
long statsValue = values.stream().filter(NumberUtils::isCreatable)
.mapToLong(Long::parseLong).sum();
Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
statsTypeToValue.put(StatsType.ROW_COUNT, String.valueOf(statsValue));
if (result.getCategory() == StatsCategory.Category.TABLE) {
statistics.updateTableStats(result.getTableId(), statsTypeToValue);
} else if (result.getCategory() == StatsCategory.Category.PARTITION) {
statistics.updatePartitionStats(result.getTableId(), result.getPartitionName(), statsTypeToValue);
}
}
private void updateTabletMaxValue(TaskResult result, List<String> values) throws AnalysisException {
Column column = getNotNullColumn(result);
Type type = column.getType();
String maxValue = getNumericMaxOrMinValue(values, type, true);
Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
statsTypeToValue.put(StatsType.MAX_VALUE, maxValue);
updateTabletGranularityStats(result, type, statsTypeToValue);
}
private void updateTabletMinValue(TaskResult result, List<String> values) throws AnalysisException {
Column column = getNotNullColumn(result);
Type type = column.getType();
String minValue = getNumericMaxOrMinValue(values, type, false);
Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
statsTypeToValue.put(StatsType.MIN_VALUE, minValue);
updateTabletGranularityStats(result, type, statsTypeToValue);
}
private void updateTabletNDV(TaskResult result, List<String> values) throws AnalysisException {
double statsValue = values.stream().filter(NumberUtils::isCreatable)
.mapToLong(Long::parseLong).sum();
Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
statsTypeToValue.put(StatsType.NDV, String.valueOf(statsValue));
Column column = getNotNullColumn(result);
Type type = column.getType();
updateTabletGranularityStats(result, type, statsTypeToValue);
}
private void updateTabletGranularityStats(TaskResult result, Type columnType,
Map<StatsType, String> statsTypeToValue) throws AnalysisException {
if (result.getCategory() == StatsCategory.Category.TABLE) {
statistics.updateColumnStats(result.getTableId(),
result.getColumnName(), columnType, statsTypeToValue);
} else if (result.getCategory() == StatsCategory.Category.PARTITION) {
statistics.updateColumnStats(result.getTableId(), result.getPartitionName(),
result.getColumnName(), columnType, statsTypeToValue);
}
}
private Table validateTableName(TableName dbTableName) throws AnalysisException {
String dbName = dbTableName.getDb();
String tableName = dbTableName.getTbl();
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbName);
return db.getTableOrAnalysisException(tableName);
}
/**
* Partition name is optional, if partition name is not null, it will be validated.
*/
private String validatePartitionName(Table table, String partitionName) throws AnalysisException {
if (!table.isPartitioned() && !Strings.isNullOrEmpty(partitionName)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_PARTITION_CLAUSE_ON_NONPARTITIONED,
partitionName, table.getName());
}
if (!Strings.isNullOrEmpty(partitionName) && table.getPartition(partitionName) == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_UNKNOWN_PARTITION,
partitionName, table.getName());
}
return partitionName;
}
private Column validateColumn(Table table, String columnName) throws AnalysisException {
Column column = table.getColumn(columnName);
if (column == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, table.getName());
}
return column;
}
private void validateResult(TaskResult result) throws AnalysisException {
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(result.getDbId());
Table table = db.getTableOrAnalysisException(result.getTableId());
if (!Strings.isNullOrEmpty(result.getPartitionName())) {
validatePartitionName(table, result.getPartitionName());
}
if (!Strings.isNullOrEmpty(result.getColumnName())) {
validateColumn(table, result.getColumnName());
}
Map<StatsType, String> statsTypeToValue = result.getStatsTypeToValue();
if (statsTypeToValue == null || statsTypeToValue.isEmpty()) {
throw new AnalysisException("StatsTypeToValue is empty.");
}
}
private Column getNotNullColumn(TaskResult result) throws AnalysisException {
Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(result.getDbId());
Table table = db.getTableOrAnalysisException(result.getTableId());
Column column = table.getColumn(result.getColumnName());
if (column == null) {
throw new AnalysisException("Column " + result.getColumnName() + " does not exist");
}
return column;
}
/**
* Get the max/min value of the column.
*
* @param values String List of values
* @param type column type
* @param maxOrMin true for max, false for min
* @return the max/min value of the column.
*/
private String getNumericMaxOrMinValue(List<String> values, Type type, boolean maxOrMin) {
if (type.isFixedPointType()) {
long result = 0L;
for (String value : values) {
if (NumberUtils.isCreatable(value)) {
long temp = Long.parseLong(value);
if (maxOrMin) {
result = Math.max(result, temp);
} else {
result = Math.min(result, temp);
}
}
}
return String.valueOf(result);
}
if (type.isFloatingPointType()) {
double result = 0.0;
for (String value : values) {
if (NumberUtils.isCreatable(value)) {
double temp = Double.parseDouble(value);
if (maxOrMin) {
result = Math.max(result, temp);
} else {
result = Math.min(result, temp);
}
}
}
return String.valueOf(result);
}
// is not numeric type
values.sort(Comparator.naturalOrder());
return values.size() > 0 ? values.get(values.size() - 1) : null;
}
}

View File

@ -1,172 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Env;
import org.apache.doris.common.DdlException;
import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.List;
import java.util.concurrent.Callable;
/**
* The StatisticsTask belongs to one StatisticsJob.
* A job may be split into multiple tasks but a task can only belong to one job.
*
* @granularityDesc, @categoryDesc, @statsTypeList
* These three attributes indicate which statistics this task is responsible for collecting.
* In general, a task will collect more than one @StatsType at the same time
* while all of types belong to the same @granularityDesc and @categoryDesc.
* For example: the task is responsible for collecting min, max, ndv of t1.c1 in partition p1.
* @granularityDesc: StatsGranularity=partition
*/
public abstract class StatisticsTask implements Callable<StatisticsTaskResult> {
protected static final Logger LOG = LogManager.getLogger(StatisticsTask.class);
public enum TaskState {
PENDING,
RUNNING,
FINISHED,
FAILED
}
protected long id = Env.getCurrentEnv().getNextId();
protected long jobId;
protected List<StatisticsDesc> statsDescs;
protected TaskState taskState = TaskState.PENDING;
protected final long createTime = System.currentTimeMillis();
protected long startTime = -1L;
protected long finishTime = -1L;
public StatisticsTask(long jobId, List<StatisticsDesc> statsDescs) {
this.jobId = jobId;
this.statsDescs = statsDescs;
}
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
public long getJobId() {
return jobId;
}
public List<StatisticsDesc> getStatsDescs() {
return statsDescs;
}
public TaskState getTaskState() {
return taskState;
}
public long getCreateTime() {
return createTime;
}
public long getStartTime() {
return startTime;
}
public long getFinishTime() {
return finishTime;
}
/**
* Different statistics implement different collection methods.
*
* @return true if this task is finished, false otherwise
* @throws Exception
*/
@Override
public abstract StatisticsTaskResult call() throws Exception;
// please retain job lock firstly
public void updateTaskState(TaskState newState) throws DdlException {
LOG.info("To change statistics task(id={}) state from {} to {}", id, taskState, newState);
String errorMsg = "Invalid statistics task state transition from ";
// PENDING -> RUNNING/FAILED
if (taskState == TaskState.PENDING) {
switch (newState) {
case RUNNING:
startTime = System.currentTimeMillis();
break;
case FAILED:
finishTime = System.currentTimeMillis();
break;
default:
throw new DdlException(errorMsg + taskState + " to " + newState);
}
} else if (taskState == TaskState.RUNNING) { // RUNNING -> FINISHED/FAILED
switch (newState) {
case FINISHED:
case FAILED:
finishTime = System.currentTimeMillis();
break;
default:
throw new DdlException(errorMsg + taskState + " to " + newState);
}
} else { // unsupported state transition
throw new DdlException(errorMsg + taskState + " to " + newState);
}
LOG.info("Statistics task(id={}) state changed from {} to {}", id, taskState, newState);
taskState = newState;
}
protected void checkStatisticsDesc() throws DdlException {
for (StatisticsDesc statsDesc : statsDescs) {
if (statsDesc == null) {
throw new DdlException("StatisticsDesc is null.");
}
if (statsDesc.getStatsCategory() == null) {
throw new DdlException("Category is null.");
}
if (statsDesc.getStatsGranularity() == null) {
throw new DdlException("Granularity is null.");
}
Preconditions.checkState(statsDesc.getStatsCategory().getDbId() > 0L);
Preconditions.checkState(statsDesc.getStatsCategory().getTableId() > 0L);
}
}
protected TaskResult createNewTaskResult(StatsCategory category, StatsGranularity granularity) {
TaskResult result = new TaskResult();
result.setDbId(category.getDbId());
result.setTableId(category.getTableId());
result.setPartitionName(category.getPartitionName());
result.setColumnName(category.getColumnName());
result.setCategory(category.getCategory());
result.setGranularity(granularity.getGranularity());
result.setStatsTypeToValue(Maps.newHashMap());
return result;
}
}

View File

@ -1,132 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.statistics.StatsCategory.Category;
import org.apache.doris.statistics.StatsGranularity.Granularity;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public class StatisticsTaskResult {
private List<TaskResult> taskResults;
public StatisticsTaskResult(List<TaskResult> taskResults) {
this.taskResults = taskResults;
}
public List<TaskResult> getTaskResults() {
return taskResults;
}
public void setTaskResults(List<TaskResult> taskResults) {
this.taskResults = taskResults;
}
public static class TaskResult {
private long dbId = -1L;
private long tableId = -1L;
private String partitionName = "";
private String columnName = "";
private Category category;
private Granularity granularity;
private Map<StatsType, String> statsTypeToValue;
public long getDbId() {
return dbId;
}
public void setDbId(long dbId) {
this.dbId = dbId;
}
public long getTableId() {
return tableId;
}
public void setTableId(long tableId) {
this.tableId = tableId;
}
public String getPartitionName() {
return partitionName;
}
public void setPartitionName(String partitionName) {
this.partitionName = partitionName;
}
public String getColumnName() {
return columnName;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
public Category getCategory() {
return category;
}
public void setCategory(Category category) {
this.category = category;
}
public Granularity getGranularity() {
return granularity;
}
public void setGranularity(Granularity granularity) {
this.granularity = granularity;
}
public Map<StatsType, String> getStatsTypeToValue() {
return statsTypeToValue;
}
public void setStatsTypeToValue(Map<StatsType, String> statsTypeToValue) {
this.statsTypeToValue = statsTypeToValue;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
TaskResult that = (TaskResult) o;
return dbId == that.dbId
&& tableId == that.tableId
&& partitionName.equals(that.partitionName)
&& columnName.equals(that.columnName)
&& category == that.category
&& granularity == that.granularity;
}
@Override
public int hashCode() {
return Objects.hash(dbId, tableId, partitionName,
columnName, category, granularity);
}
}
}

View File

@ -1,198 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.AnalyzeStmt;
import org.apache.doris.catalog.Env;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.ThreadPoolManager;
import org.apache.doris.common.util.MasterDaemon;
import org.apache.doris.statistics.StatisticsJob.JobState;
import org.apache.doris.statistics.StatisticsTask.TaskState;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Queues;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
/**
* Schedule statistics task
*/
public class StatisticsTaskScheduler extends MasterDaemon {
private static final Logger LOG = LogManager.getLogger(StatisticsTaskScheduler.class);
private final Queue<StatisticsTask> queue = Queues.newLinkedBlockingQueue();
public StatisticsTaskScheduler() {
super("Statistics task scheduler",
Config.statistic_task_scheduler_execution_interval_ms);
}
@Override
protected void runAfterCatalogReady() {
// step1: task n concurrent tasks from the queue
List<StatisticsTask> tasks = peek();
if (!tasks.isEmpty()) {
ThreadPoolExecutor executor = ThreadPoolManager.newDaemonCacheThreadPool(tasks.size(),
"statistic-pool", false);
StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager();
Map<Long, StatisticsJob> statisticsJobs = jobManager.getIdToStatisticsJob();
Map<Long, List<Map<Long, Future<StatisticsTaskResult>>>> resultMap = Maps.newLinkedHashMap();
for (StatisticsTask task : tasks) {
long jobId = task.getJobId();
if (checkJobIsValid(jobId)) {
// step2: execute task and save task result
Future<StatisticsTaskResult> future = executor.submit(task);
StatisticsJob statisticsJob = statisticsJobs.get(jobId);
if (updateTaskAndJobState(task, statisticsJob)) {
Map<Long, Future<StatisticsTaskResult>> taskInfo = Maps.newHashMap();
taskInfo.put(task.getId(), future);
List<Map<Long, Future<StatisticsTaskResult>>> jobInfo = resultMap
.getOrDefault(jobId, Lists.newArrayList());
jobInfo.add(taskInfo);
resultMap.put(jobId, jobInfo);
}
}
}
// step3: handle task results
handleTaskResult(resultMap);
}
}
public void addTasks(List<StatisticsTask> statisticsTaskList) throws IllegalStateException {
queue.addAll(statisticsTaskList);
}
private List<StatisticsTask> peek() {
List<StatisticsTask> tasks = Lists.newArrayList();
int i = Config.cbo_concurrency_statistics_task_num;
while (i > 0) {
StatisticsTask task = queue.poll();
if (task == null) {
break;
}
tasks.add(task);
i--;
}
return tasks;
}
/**
* Update task and job state
*
* @param task statistics task
* @param job statistics job
* @return true if update task and job state successfully.
*/
private boolean updateTaskAndJobState(StatisticsTask task, StatisticsJob job) {
try {
// update task state
task.updateTaskState(TaskState.RUNNING);
} catch (DdlException e) {
LOG.info("Update statistics task state failed, taskId: " + task.getId(), e);
}
try {
// update job state
if (task.getTaskState() != TaskState.RUNNING) {
job.updateJobState(JobState.FAILED);
} else {
if (job.getJobState() == JobState.SCHEDULING) {
job.updateJobState(JobState.RUNNING);
}
}
} catch (DdlException e) {
LOG.info("Update statistics job state failed, jobId: " + job.getId(), e);
return false;
}
return true;
}
private void handleTaskResult(Map<Long, List<Map<Long, Future<StatisticsTaskResult>>>> resultMap) {
StatisticsManager statsManager = Env.getCurrentEnv().getStatisticsManager();
StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager();
resultMap.forEach((jobId, taskMapList) -> {
if (checkJobIsValid(jobId)) {
StatisticsJob statisticsJob = jobManager.getIdToStatisticsJob().get(jobId);
Map<String, String> properties = statisticsJob.getProperties();
long timeout = Long.parseLong(properties.get(AnalyzeStmt.CBO_STATISTICS_TASK_TIMEOUT_SEC));
// For tasks with tablet granularity,
// we need aggregate calculations to get the results of the statistics,
// so we need to put all the tasks together and handle the results together.
List<StatisticsTaskResult> taskResults = Lists.newArrayList();
for (Map<Long, Future<StatisticsTaskResult>> taskInfos : taskMapList) {
taskInfos.forEach((taskId, future) -> {
String errorMsg = "";
try {
StatisticsTaskResult taskResult = future.get(timeout, TimeUnit.SECONDS);
taskResults.add(taskResult);
} catch (TimeoutException | ExecutionException | InterruptedException
| CancellationException e) {
errorMsg = e.getMessage();
LOG.error("Failed to get statistics. jobId: {}, taskId: {}, e: {}", jobId, taskId, e);
}
try {
statisticsJob.updateJobInfoByTaskId(taskId, errorMsg);
} catch (DdlException e) {
LOG.info("Failed to update statistics job info. jobId: {}, e: {}", jobId, e);
}
});
}
try {
statsManager.updateStatistics(taskResults);
} catch (AnalysisException e) {
LOG.info("Failed to update statistics. jobId: {}, e: {}", jobId, e);
}
}
});
}
public boolean checkJobIsValid(Long jobId) {
StatisticsJobManager jobManager = Env.getCurrentEnv().getStatisticsJobManager();
StatisticsJob statisticsJob = jobManager.getIdToStatisticsJob().get(jobId);
if (statisticsJob == null) {
return false;
}
JobState jobState = statisticsJob.getJobState();
return jobState != JobState.CANCELLED && jobState != JobState.FAILED;
}
}

View File

@ -1,309 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.util.Util;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
/**
* There are the statistics of table.
* The table stats are mainly used to provide input for the Optimizer's cost model.
* The description of table stats are following:
* - @rowCount: The row count of table.
* - @dataSize: The data size of table.
* - @nameToColumnStats: <@String columnName, @ColumnStats columnStats>
* <p>
* Each column in the Table will have corresponding @ColumnStats.
* Those @ColumnStats are recorded in @nameToColumnStats form of MAP.
* This facilitates the optimizer to quickly find the corresponding:
* - @ColumnStats based on the column name.
* - @rowCount: The row count of table.
* - @dataSize: The data size of table.
* <p>
* The granularity of the statistics is whole table.
* For example: "@rowCount = 1000" means that the row count is 1000 in the whole table.
* <p>
* After the statistics task is successfully completed, update the TableStats,
* TableStats should not be updated in any other way.
*/
public class TableStats {
public static final StatsType DATA_SIZE = StatsType.DATA_SIZE;
public static final StatsType ROW_COUNT = StatsType.ROW_COUNT;
private static final Predicate<Double> DESIRED_ROW_COUNT_PRED = (v) -> v >= -1L;
private static final Predicate<Long> DESIRED_DATA_SIZE_PRED = (v) -> v >= -1L;
private double rowCount = -1;
private long dataSize = -1;
private final Map<String, PartitionStats> nameToPartitionStats = Maps.newConcurrentMap();
private final Map<String, ColumnStat> nameToColumnStats = Maps.newConcurrentMap();
/**
* Return a default partition statistic.
*/
public static TableStats getDefaultTableStats() {
return new TableStats();
}
public TableStats() {
}
public TableStats(double rowCount, long dataSize) {
this.rowCount = rowCount;
this.dataSize = dataSize;
}
public double getRowCount() {
// '!isEmpty()' is added mainly because the result returns 0
// instead of the expected -1 when nameToPartitionStats is empty.
if (rowCount == -1 && !nameToPartitionStats.isEmpty()) {
return nameToPartitionStats.values().stream()
.filter(partitionStats -> partitionStats.getRowCount() != -1)
.mapToLong(PartitionStats::getRowCount).sum();
}
return rowCount;
}
public long getDataSize() {
if (dataSize == -1 && !nameToPartitionStats.isEmpty()) {
return nameToPartitionStats.values().stream()
.filter(partitionStats -> partitionStats.getDataSize() != -1)
.mapToLong(PartitionStats::getDataSize).sum();
}
return dataSize;
}
public Map<String, PartitionStats> getNameToPartitionStats() {
return nameToPartitionStats;
}
public Map<String, ColumnStat> getNameToColumnStats() {
if (nameToColumnStats.isEmpty()) {
return getAggPartitionColStats();
}
return nameToColumnStats;
}
public PartitionStats getPartitionStats(String partitionName) {
return nameToPartitionStats.get(partitionName);
}
/**
* If the partition statistics do not exist, the default statistics will be returned.
*/
public PartitionStats getPartitionStatsOrDefault(String columnName) {
return nameToPartitionStats.getOrDefault(columnName,
PartitionStats.getDefaultPartitionStats());
}
/**
* If the column statistics do not exist, the default statistics will be returned.
*/
public ColumnStat getColumnStatsOrDefault(String columnName) {
return nameToColumnStats.getOrDefault(columnName,
ColumnStat.getDefaultColumnStats());
}
/**
* After the statistics task is successfully completed, update the statistics of the partition,
* statistics should not be updated in any other way.
*/
public void updateTableStats(Map<StatsType, String> statsTypeToValue) throws AnalysisException {
for (Map.Entry<StatsType, String> entry : statsTypeToValue.entrySet()) {
if (entry.getKey() == ROW_COUNT) {
rowCount = Util.getDoublePropertyOrDefault(entry.getValue(), rowCount,
DESIRED_ROW_COUNT_PRED, ROW_COUNT + " should >= -1");
} else if (entry.getKey() == DATA_SIZE) {
dataSize = Util.getLongPropertyOrDefault(entry.getValue(), dataSize,
DESIRED_DATA_SIZE_PRED, DATA_SIZE + " should >= -1");
}
}
}
/**
* After the statistics task is successfully completed, update the statistics of the partition,
* statistics should not be updated in any other way.
*/
public void updatePartitionStats(String partitionName, Map<StatsType, String> statsTypeToValue)
throws AnalysisException {
PartitionStats partitionStats = getNotNullPartitionStats(partitionName);
partitionStats.updatePartitionStats(statsTypeToValue);
}
/**
* After the statistics task is successfully completed, update the statistics of the column,
* statistics should not be updated in any other way.
*/
public void updateColumnStats(String columnName, Type columnType, Map<StatsType, String> statsTypeToValue)
throws AnalysisException {
ColumnStat columnStat = getColumnStats(columnName);
columnStat.updateStats(columnType, statsTypeToValue);
}
/**
* If partition stats is not exist, create a new one.
*
* @param partitionName partition name
* @return @PartitionStats
*/
private PartitionStats getNotNullPartitionStats(String partitionName) {
PartitionStats partitionStat = nameToPartitionStats.get(partitionName);
if (partitionStat == null) {
partitionStat = new PartitionStats();
nameToPartitionStats.put(partitionName, partitionStat);
}
return partitionStat;
}
/**
* If column stats is not exist, create a new one.
*
* @param columnName column name
* @return @ColumnStats
*/
private ColumnStat getNotNullColumnStats(String columnName) {
ColumnStat columnStat = nameToColumnStats.get(columnName);
if (columnStat == null) {
columnStat = new ColumnStat();
nameToColumnStats.put(columnName, columnStat);
}
return columnStat;
}
public ColumnStat getColumnStats(String columnName) {
ColumnStat columnStat = nameToColumnStats.get(columnName);
if (columnStat == null) {
columnStat = new ColumnStat();
nameToColumnStats.put(columnName, columnStat);
}
return columnStat;
}
public ColumnStat getColumnStatCopy(String columnName) {
ColumnStat columnStat = getColumnStats(columnName);
return columnStat.copy();
}
public List<String> getShowInfo() {
List<String> result = Lists.newArrayList();
result.add(Double.toString(getRowCount()));
result.add(Long.toString(getDataSize()));
return result;
}
public List<String> getShowInfo(String partitionName) {
PartitionStats partitionStats = nameToPartitionStats.get(partitionName);
return partitionStats.getShowInfo();
}
private Map<String, ColumnStat> getAggPartitionColStats() {
Map<String, ColumnStat> aggColumnStats = new HashMap<>();
for (PartitionStats partitionStats : nameToPartitionStats.values()) {
partitionStats.getNameToColumnStats().forEach((colName, columnStats) -> {
if (!aggColumnStats.containsKey(colName)) {
aggColumnStats.put(colName, columnStats.copy());
} else {
ColumnStat tblColStats = aggColumnStats.get(colName);
mergePartitionColumnStats(tblColStats, columnStats);
}
});
}
return aggColumnStats;
}
private void mergePartitionColumnStats(ColumnStat leftStats, ColumnStat rightStats) {
if (leftStats.getNdv() == -1) {
if (rightStats.getNdv() != -1) {
leftStats.setNdv(rightStats.getNdv());
}
} else {
if (rightStats.getNdv() != -1) {
double ndv = leftStats.getNdv() + rightStats.getNdv();
leftStats.setNdv(ndv);
}
}
if (leftStats.getAvgSizeByte() == -1) {
if (rightStats.getAvgSizeByte() != -1) {
leftStats.setAvgSizeByte(rightStats.getAvgSizeByte());
}
} else {
if (rightStats.getAvgSizeByte() != -1) {
double avgSize = (leftStats.getAvgSizeByte() + rightStats.getAvgSizeByte()) / 2;
leftStats.setAvgSizeByte(avgSize);
}
}
if (leftStats.getMaxSizeByte() == -1) {
if (rightStats.getMaxSizeByte() != -1) {
leftStats.setMaxSizeByte(rightStats.getMaxSizeByte());
}
} else {
if (rightStats.getMaxSizeByte() != -1) {
double maxSize = Math.max(leftStats.getMaxSizeByte(), rightStats.getMaxSizeByte());
leftStats.setMaxSizeByte(maxSize);
}
}
if (leftStats.getNumNulls() == -1) {
if (rightStats.getNumNulls() != -1) {
leftStats.setNumNulls(rightStats.getNumNulls());
}
} else {
if (rightStats.getNumNulls() != -1) {
double numNulls = leftStats.getNumNulls() + rightStats.getNumNulls();
leftStats.setNumNulls(numNulls);
}
}
if (Double.isNaN(leftStats.getMinValue())) {
if (!Double.isNaN(rightStats.getMinValue())) {
leftStats.setMinValue(rightStats.getMinValue());
}
} else if (!Double.isNaN(rightStats.getMinValue())) {
double minValue = Math.max(leftStats.getMinValue(), rightStats.getMinValue());
leftStats.setMinValue(minValue);
}
if (Double.isNaN(leftStats.getMaxValue())) {
if (!Double.isNaN(rightStats.getMaxValue())) {
leftStats.setMaxValue(rightStats.getMaxValue());
}
} else if (!Double.isNaN(rightStats.getMaxValue())) {
double maxValue = Math.min(leftStats.getMaxValue(), rightStats.getMaxValue());
leftStats.setMaxValue(maxValue);
}
}
/**
* This method is for unit test.
*/
public void putColumnStats(String name, ColumnStat columnStat) {
nameToColumnStats.put(name, columnStat);
}
}

View File

@ -36,15 +36,10 @@ import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.util.MemoTestUtils;
import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.ColumnStat;
import org.apache.doris.statistics.StatisticsManager;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.TableStats;
import com.google.common.collect.ImmutableList;
import mockit.Expectations;
import mockit.Mock;
import mockit.MockUp;
import mockit.Mocked;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@ -60,8 +55,6 @@ public class DeriveStatsJobTest {
ConnectContext context;
@Mocked
Env env;
@Mocked
StatisticsManager statisticsManager;
SlotReference slot1;
@ -81,14 +74,6 @@ public class DeriveStatsJobTest {
}
private LogicalOlapScan constructOlapSCan() throws AnalysisException {
ColumnStat columnStats1 = new ColumnStat(10, 0, 0, 5,
Double.NaN, Double.NaN);
new MockUp<TableStats>(TableStats.class) {
@Mock
public ColumnStat getColumnStats(String columnName) {
return columnStats1;
}
};
long tableId1 = 0;

View File

@ -36,11 +36,9 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalTopN;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.ColumnStat;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.TableStats;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
@ -238,13 +236,7 @@ public class StatsCalculatorTest {
@Test
public void testOlapScan(@Mocked ConnectContext context) {
ColumnStat columnStat1 = new ColumnStat();
columnStat1.setNdv(10);
columnStat1.setNumNulls(5);
long tableId1 = 0;
TableStats tableStats1 = new TableStats();
tableStats1.putColumnStats("c1", columnStat1);
List<String> qualifier = ImmutableList.of("test", "t");
SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier);

View File

@ -1,154 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class ColumnStatsTest {
private ColumnStat columnStatsUnderTest;
@Before
public void setUp() throws Exception {
columnStatsUnderTest = new ColumnStat();
}
@Test
public void testUpdateStats() throws Exception {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.MAX_SIZE, "8");
statsTypeToValue.put(StatsType.MIN_VALUE, "0");
statsTypeToValue.put(StatsType.MAX_VALUE, "100");
// Run the test
columnStatsUnderTest.updateStats(columnType, statsTypeToValue);
// Verify the results
double maxSize = columnStatsUnderTest.getMaxSizeByte();
Assert.assertEquals(8, maxSize, 0.1);
double minValue = columnStatsUnderTest.getMinValue();
Assert.assertEquals(0, minValue, 0.1);
double maxValue = columnStatsUnderTest.getMaxValue();
Assert.assertEquals(100, maxValue, 0.1);
}
@Test
public void testUpdateStats_ThrowsAnalysisException() {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.AVG_SIZE, "abc");
// Run the test
Assert.assertThrows(AnalysisException.class,
() -> columnStatsUnderTest.updateStats(columnType, statsTypeToValue));
}
@Test
public void testGetShowInfo() throws AnalysisException {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.NDV, "1");
statsTypeToValue.put(StatsType.AVG_SIZE, "8");
statsTypeToValue.put(StatsType.MAX_SIZE, "8");
statsTypeToValue.put(StatsType.NUM_NULLS, "2");
statsTypeToValue.put(StatsType.MIN_VALUE, "0");
statsTypeToValue.put(StatsType.MAX_VALUE, "1000");
columnStatsUnderTest.updateStats(columnType, statsTypeToValue);
String[] expectedInfo = {"1.0", "8.0", "8.0", "2.0", "0.0", "1000.0"};
// Run the test
List<String> showInfo = columnStatsUnderTest.getShowInfo();
String[] result = showInfo.toArray(new String[0]);
// Verify the results
Assert.assertArrayEquals(expectedInfo, result);
}
@Test
public void testGetDefaultColumnStats() {
// Run the test
ColumnStat defaultColumnStats = ColumnStat.getDefaultColumnStats();
// Verify the results
double ndv = defaultColumnStats.getNdv();
Assert.assertEquals(-1L, ndv, 0.1);
double avgSize = defaultColumnStats.getAvgSizeByte();
Assert.assertEquals(-1.0f, avgSize, 0.0001);
double maxSize = defaultColumnStats.getMaxSizeByte();
Assert.assertEquals(-1L, maxSize, 0.1);
double maxValue = defaultColumnStats.getMaxValue();
Assert.assertEquals(Double.NaN, maxValue, 0.1);
double minValue = defaultColumnStats.getMinValue();
Assert.assertEquals(Double.NaN, minValue, 0.1);
}
@Test
public void testAggColumnStats() throws Exception {
// Setup
ColumnStat columnStats = ColumnStat.getDefaultColumnStats();
ColumnStat other = new ColumnStat(1L, 4.0f, 5L, 10L,
Double.NaN,
Double.NaN);
// Run the test
ColumnStat aggColumnStats = ColumnStat.mergeColumnStats(columnStats, other);
// Verify the results
double ndv = aggColumnStats.getNdv();
// 0(default) + 1
Assert.assertEquals(1L, ndv, 0.1);
double avgSize = aggColumnStats.getAvgSizeByte();
// (0.0f + 4.0f) / 2
Assert.assertEquals(4.0f, avgSize, 0.0001);
double maxSize = aggColumnStats.getMaxSizeByte();
Assert.assertEquals(5L, maxSize, 0.1);
double numNulls = aggColumnStats.getNumNulls();
Assert.assertEquals(10L, numNulls, 0.1);
double minValue = aggColumnStats.getMinValue();
// null VS sMinValue
Assert.assertEquals(Double.NaN, minValue, 0.1);
double maxValue = aggColumnStats.getMaxValue();
// null VS sMaxValue
Assert.assertEquals(Double.NaN, maxValue, 0.1);
}
}

View File

@ -1,136 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class PartitionStatsTest {
private PartitionStats partitionStatsUnderTest;
@Before
public void setUp() throws Exception {
partitionStatsUnderTest = new PartitionStats();
}
@Test
public void testUpdatePartitionStats() throws Exception {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
// Run the test
partitionStatsUnderTest.updatePartitionStats(statsTypeToValue);
// Verify the results
long rowCount = partitionStatsUnderTest.getRowCount();
Assert.assertEquals(1000, rowCount);
long dataSize = partitionStatsUnderTest.getDataSize();
Assert.assertEquals(10240, dataSize);
}
@Test
public void testUpdatePartitionStats_ThrowsAnalysisException() {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.AVG_SIZE, "8");
statsTypeToValue.put(StatsType.ROW_COUNT, "abc");
// Run the test
Assert.assertThrows(AnalysisException.class,
() -> partitionStatsUnderTest.updatePartitionStats(statsTypeToValue));
}
@Test
public void testUpdateColumnStats() throws Exception {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.NDV, "1");
statsTypeToValue.put(StatsType.AVG_SIZE, "8");
statsTypeToValue.put(StatsType.MAX_SIZE, "8");
statsTypeToValue.put(StatsType.NUM_NULLS, "2");
statsTypeToValue.put(StatsType.MIN_VALUE, "0");
statsTypeToValue.put(StatsType.MAX_VALUE, "1000");
// Run the test
partitionStatsUnderTest.updateColumnStats("columnName", columnType, statsTypeToValue);
ColumnStat columnStats = partitionStatsUnderTest.getColumnStats("columnName");
// Verify the results
double ndv = columnStats.getNdv();
Assert.assertEquals(1, ndv, 0.1);
double avgSize = columnStats.getAvgSizeByte();
Assert.assertEquals(8.0f, avgSize, 0.0001);
double maxSize = columnStats.getMaxSizeByte();
Assert.assertEquals(8, maxSize, 0.1);
double maxValue = columnStats.getMaxValue();
Assert.assertEquals(1000, maxValue, 0.1);
double minValue = columnStats.getMinValue();
Assert.assertEquals(0, minValue, 0.1);
double numNulls = columnStats.getNumNulls();
Assert.assertEquals(2, numNulls, 0.1);
}
@Test
public void testUpdateColumnStats_ThrowsAnalysisException() {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.AVG_SIZE, "abc");
// Run the test
Assert.assertThrows(
AnalysisException.class, () -> partitionStatsUnderTest
.updateColumnStats("columnName", columnType, statsTypeToValue));
}
@Test
public void testGetShowInfo() throws AnalysisException {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
partitionStatsUnderTest.updatePartitionStats(statsTypeToValue);
String[] expectedInfo = {"1000", "10240"};
// Run the test
List<String> showInfo = partitionStatsUnderTest.getShowInfo();
String[] result = showInfo.toArray(new String[0]);
// Run the test
Assert.assertArrayEquals(expectedInfo, result);
}
}

View File

@ -1,207 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.HashDistributionInfo;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.jmockit.Deencapsulation;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.statistics.util.InternalQuery;
import org.apache.doris.statistics.util.InternalQueryResult;
import mockit.Mock;
import mockit.MockUp;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
public class SQLStatisticsTaskTest {
private SQLStatisticsTask sqlStatisticsTaskUnderTest;
@Before
public void setUp() throws Exception {
StatsCategory statsCategory = new StatsCategory();
StatsGranularity statsGranularity = new StatsGranularity();
List<StatsType> statsTypes = Collections.singletonList(StatsType.ROW_COUNT);
sqlStatisticsTaskUnderTest = new SQLStatisticsTask(0L,
Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes)));
InternalCatalog catalog = Env.getCurrentInternalCatalog();
Column column = new Column("columnName", PrimitiveType.STRING);
OlapTable tableName = new OlapTable(0L, "tableName",
Collections.singletonList(column), KeysType.AGG_KEYS,
new PartitionInfo(), new HashDistributionInfo());
Database database = new Database(0L, "db");
database.createTable(tableName);
ConcurrentHashMap<String, Database> fullNameToDb = new ConcurrentHashMap<>();
fullNameToDb.put("cluster:db", database);
Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb);
ConcurrentHashMap<Long, Database> idToDb = new ConcurrentHashMap<>();
idToDb.put(0L, database);
Deencapsulation.setField(catalog, "idToDb", idToDb);
List<String> columns = Collections.singletonList("row_count");
List<PrimitiveType> types = Arrays.asList(PrimitiveType.STRING,
PrimitiveType.INT, PrimitiveType.FLOAT,
PrimitiveType.DOUBLE, PrimitiveType.BIGINT);
InternalQueryResult queryResult = new InternalQueryResult();
InternalQueryResult.ResultRow resultRow =
new InternalQueryResult.ResultRow(columns, types, Collections.singletonList("1000"));
queryResult.getResultRows().add(resultRow);
new MockUp<InternalQuery>(InternalQuery.class) {
@Mock
public InternalQueryResult query() {
return queryResult;
}
};
}
@Test
public void testConstructQuery() throws Exception {
// Setup
String expectedSQL = "SELECT COUNT(1) AS row_count FROM tableName;";
StatsCategory statsCategory = new StatsCategory();
statsCategory.setCategory(StatsCategory.Category.TABLE);
statsCategory.setDbId(0L);
statsCategory.setTableId(0L);
statsCategory.setPartitionName("partitionName");
statsCategory.setColumnName("columnName");
statsCategory.setStatsValue("statsValue");
StatsGranularity statsGranularity = new StatsGranularity();
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
statsGranularity.setTableId(0L);
statsGranularity.setPartitionId(0L);
statsGranularity.setTabletId(0L);
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
Collections.singletonList(StatsType.ROW_COUNT));
// Run the test
String result = sqlStatisticsTaskUnderTest.constructQuery(statsDesc);
// Verify the results
Assert.assertEquals(expectedSQL, result);
}
@Test
public void testConstructQuery_ThrowsDdlException() {
// Setup
StatsCategory statsCategory = new StatsCategory();
statsCategory.setCategory(StatsCategory.Category.TABLE);
statsCategory.setDbId(0L);
statsCategory.setTableId(0L);
statsCategory.setPartitionName("partitionName");
statsCategory.setColumnName("columnName");
statsCategory.setStatsValue("statsValue");
StatsGranularity statsGranularity = new StatsGranularity();
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
statsGranularity.setTableId(0L);
statsGranularity.setPartitionId(0L);
statsGranularity.setTabletId(0L);
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
Collections.singletonList(StatsType.UNKNOWN));
// Run the test
Assert.assertThrows(DdlException.class,
() -> sqlStatisticsTaskUnderTest.constructQuery(statsDesc));
}
@Test
public void testExecuteQuery() throws Exception {
// Setup
StatsCategory statsCategory = new StatsCategory();
statsCategory.setCategory(StatsCategory.Category.TABLE);
statsCategory.setDbId(0L);
statsCategory.setTableId(0L);
statsCategory.setPartitionName("partitionName");
statsCategory.setColumnName("columnName");
statsCategory.setStatsValue("statsValue");
StatsGranularity statsGranularity = new StatsGranularity();
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
statsGranularity.setTableId(0L);
statsGranularity.setPartitionId(0L);
statsGranularity.setTabletId(0L);
StatisticsTaskResult.TaskResult expectedResult = new StatisticsTaskResult.TaskResult();
expectedResult.setDbId(0L);
expectedResult.setTableId(0L);
expectedResult.setPartitionName("partitionName");
expectedResult.setColumnName("columnName");
expectedResult.setCategory(StatsCategory.Category.TABLE);
expectedResult.setGranularity(StatsGranularity.Granularity.TABLE);
HashMap<StatsType, String> hashMap = new HashMap<>();
hashMap.put(StatsType.ROW_COUNT, "1000");
expectedResult.setStatsTypeToValue(hashMap);
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
Collections.singletonList(StatsType.ROW_COUNT));
// Run the test
StatisticsTaskResult.TaskResult result = sqlStatisticsTaskUnderTest.executeQuery(statsDesc);
// Verify the results
Assert.assertEquals(expectedResult, result);
}
@Test
public void testExecuteQuery_ThrowsException() {
// Setup
StatsCategory statsCategory = new StatsCategory();
statsCategory.setCategory(StatsCategory.Category.TABLE);
statsCategory.setDbId(0L);
statsCategory.setTableId(0L);
statsCategory.setPartitionName("partitionName");
statsCategory.setColumnName("columnName");
statsCategory.setStatsValue("statsValue");
StatsGranularity statsGranularity = new StatsGranularity();
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
statsGranularity.setTableId(0L);
statsGranularity.setPartitionId(0L);
statsGranularity.setTabletId(0L);
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
Arrays.asList(StatsType.NDV, StatsType.MAX_VALUE, StatsType.MIN_VALUE));
// Run the test
Assert.assertThrows(Exception.class,
() -> sqlStatisticsTaskUnderTest.executeQuery(statsDesc));
}
}

View File

@ -1,205 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.HashDistributionInfo;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.jmockit.Deencapsulation;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.statistics.StatsCategory.Category;
import org.apache.doris.statistics.StatsGranularity.Granularity;
import org.apache.doris.statistics.util.InternalQuery;
import org.apache.doris.statistics.util.InternalQueryResult;
import mockit.Mock;
import mockit.MockUp;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
public class SampleSQLStatisticsTaskTest {
private SampleSQLStatisticsTask sampleSQLStatisticsTaskUnderTest;
@Before
public void setUp() throws Exception {
InternalCatalog catalog = Env.getCurrentInternalCatalog();
Column column = new Column("columnName", PrimitiveType.STRING);
OlapTable tableName = new OlapTable(0L, "tableName",
Collections.singletonList(column), KeysType.AGG_KEYS,
new PartitionInfo(), new HashDistributionInfo());
Database database = new Database(0L, "db");
database.createTable(tableName);
ConcurrentHashMap<String, Database> fullNameToDb = new ConcurrentHashMap<>();
fullNameToDb.put("cluster:db", database);
Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb);
ConcurrentHashMap<Long, Database> idToDb = new ConcurrentHashMap<>();
idToDb.put(0L, database);
Deencapsulation.setField(catalog, "idToDb", idToDb);
List<String> columns = Collections.singletonList("row_count");
List<PrimitiveType> types = Arrays.asList(PrimitiveType.STRING,
PrimitiveType.INT, PrimitiveType.FLOAT,
PrimitiveType.DOUBLE, PrimitiveType.BIGINT);
InternalQueryResult queryResult = new InternalQueryResult();
InternalQueryResult.ResultRow resultRow =
new InternalQueryResult.ResultRow(columns, types, Collections.singletonList("1000"));
queryResult.getResultRows().add(resultRow);
StatsCategory statsCategory = new StatsCategory();
StatsGranularity statsGranularity = new StatsGranularity();
List<StatsType> statsTypes = Collections.singletonList(StatsType.ROW_COUNT);
sampleSQLStatisticsTaskUnderTest = new SampleSQLStatisticsTask(0L,
Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes)));
new MockUp<InternalQuery>(InternalQuery.class) {
@Mock
public InternalQueryResult query() {
return queryResult;
}
};
}
@Test
public void testGetQueryParams() throws Exception {
// Setup
Map<String, String> expectedResult = new HashMap<>();
expectedResult.put("table", "tableName");
expectedResult.put("partition", "partitionName");
expectedResult.put("column", "columnName");
expectedResult.put("percent", "10");
StatsCategory category = new StatsCategory();
category.setCategory(Category.TABLE);
category.setDbId(0L);
category.setTableId(0L);
category.setPartitionName("partitionName");
category.setColumnName("columnName");
category.setStatsValue("statsValue");
StatsGranularity statsGranularity = new StatsGranularity();
statsGranularity.setGranularity(Granularity.TABLE);
statsGranularity.setTableId(0L);
statsGranularity.setPartitionId(0L);
statsGranularity.setTabletId(0L);
StatisticsDesc statsDesc = new StatisticsDesc(category, statsGranularity,
Collections.singletonList(StatsType.ROW_COUNT));
// Run the test
Map<String, String> result = sampleSQLStatisticsTaskUnderTest.getQueryParams(statsDesc);
// Verify the results
Assert.assertEquals(expectedResult, result);
}
@Test
public void testGetQueryParams_ThrowsDdlException() {
// Setup
StatsCategory category = new StatsCategory();
category.setCategory(Category.TABLE);
category.setDbId(-1L);
category.setTableId(0L);
category.setPartitionName("partitionName");
category.setColumnName("columnName");
category.setStatsValue("statsValue");
StatsGranularity statsGranularity = new StatsGranularity();
statsGranularity.setGranularity(Granularity.PARTITION);
statsGranularity.setTableId(0L);
statsGranularity.setPartitionId(0L);
statsGranularity.setTabletId(0L);
StatisticsDesc statsDesc = new StatisticsDesc(category, statsGranularity,
Collections.singletonList(StatsType.ROW_COUNT));
// Run the test
Assert.assertThrows(DdlException.class,
() -> sampleSQLStatisticsTaskUnderTest.getQueryParams(statsDesc));
}
@Test
public void testConstructQuery() throws Exception {
// Setup
String expectedSQL = "SELECT COUNT(1) AS row_count FROM tableName TABLESAMPLE(10 PERCENT);";
StatsCategory statsCategory = new StatsCategory();
statsCategory.setCategory(StatsCategory.Category.TABLE);
statsCategory.setDbId(0L);
statsCategory.setTableId(0L);
statsCategory.setPartitionName("partitionName");
statsCategory.setColumnName("columnName");
statsCategory.setStatsValue("statsValue");
StatsGranularity statsGranularity = new StatsGranularity();
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
statsGranularity.setTableId(0L);
statsGranularity.setPartitionId(0L);
statsGranularity.setTabletId(0L);
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
Collections.singletonList(StatsType.ROW_COUNT));
// Run the test
String result = sampleSQLStatisticsTaskUnderTest.constructQuery(statsDesc);
// Verify the results
Assert.assertEquals(expectedSQL, result);
}
@Test
public void testExecuteQuery_ThrowsException() {
// Setup
StatsGranularity statsGranularity = new StatsGranularity();
statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE);
statsGranularity.setTableId(0L);
statsGranularity.setPartitionId(0L);
statsGranularity.setTabletId(0L);
StatsCategory statsCategory = new StatsCategory();
statsCategory.setCategory(StatsCategory.Category.TABLE);
statsCategory.setDbId(0L);
statsCategory.setTableId(0L);
statsCategory.setPartitionName("partitionName");
statsCategory.setColumnName("columnName");
statsCategory.setStatsValue("statsValue");
StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity,
Arrays.asList(StatsType.NDV, StatsType.MAX_VALUE, StatsType.MIN_VALUE));
// Run the test
Assert.assertThrows(Exception.class,
() -> sampleSQLStatisticsTaskUnderTest.executeQuery(statsDesc));
}
}

View File

@ -1,182 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.HashDistributionInfo;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.jmockit.Deencapsulation;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.system.SystemInfoService;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import mockit.Mock;
import mockit.MockUp;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
public class StatisticsJobSchedulerTest {
private StatisticsJob statisticsJob;
private StatisticsJobScheduler statisticsJobSchedulerUnderTest;
@Before
public void setUp() throws Exception {
HashSet<Long> tblIds = Sets.newHashSet();
tblIds.add(0L);
tblIds.add(1L);
Map<Long, List<String>> tableIdToColumnName = Maps.newHashMap();
tableIdToColumnName.put(0L, Arrays.asList("c1", "c2"));
tableIdToColumnName.put(1L, Arrays.asList("c1", "c2"));
Map<Long, List<String>> tblIdToPartitionName = Maps.newHashMap();
statisticsJob = new StatisticsJob(0L, tblIds, tblIdToPartitionName,
tableIdToColumnName, null);
statisticsJobSchedulerUnderTest = new StatisticsJobScheduler();
statisticsJobSchedulerUnderTest.addPendingJob(statisticsJob);
}
@Test
public void testRunAfterCatalogReady() {
// Setup
Column col1 = new Column("c1", PrimitiveType.STRING);
Column col2 = new Column("c2", PrimitiveType.INT);
OlapTable tbl1 = new OlapTable(0L, "tbl1", Arrays.asList(col1, col2),
KeysType.AGG_KEYS, new PartitionInfo(), new HashDistributionInfo());
OlapTable tbl2 = new OlapTable(1L, "tbl2", Arrays.asList(col1, col2),
KeysType.DUP_KEYS, new PartitionInfo(), new HashDistributionInfo());
Database database = new Database(0L, "db");
database.createTable(tbl1);
database.createTable(tbl2);
InternalCatalog catalog = Env.getCurrentInternalCatalog();
ConcurrentHashMap<String, Database> fullNameToDb = new ConcurrentHashMap<>();
fullNameToDb.put("cluster:db", database);
Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb);
ConcurrentHashMap<Long, Database> idToDb = new ConcurrentHashMap<>();
idToDb.put(0L, database);
Deencapsulation.setField(catalog, "idToDb", idToDb);
new MockUp<SystemInfoService>(SystemInfoService.class) {
@Mock
public List<Long> getBackendIds(boolean needAlive) {
return Collections.singletonList(1L);
}
};
new MockUp<OlapTable>(OlapTable.class) {
@Mock
public long getDataSize() {
return 1L;
}
};
// Run the test
statisticsJobSchedulerUnderTest.runAfterCatalogReady();
/*
* expected results:
* mateTask(2):
* - tbl1:
* - task1:
* - data_size
* - max_size(c2)
* - avg_size(c2)
* - tbl2:
* - task:
* - row_count
* - data_size
* - max_size(c2)
* - avg_size(c2)
*
* sqlTask(11):
* - tbl1:
* - task:
* - ndv(c1)
* - min_value(c1)
* - max_value(c1)
* - task:
* - ndv(c2)
* - min_value(c2)
* - max_value(c2)
* - task:
* - max_size(c1)
* - avg_size(c1)
* - task:
* - num_nulls(c1)
* - task:
* - num_nulls(c2)
* - task
* - row_count
* - tbl2:
* - task:
* - ndv(c1)
* - min_value(c1)
* - max_value(c1)
* - task:
* - ndv(c2)
* - min_value(c2)
* - max_value(c2)
* - task:
* - max_size(c1)
* - avg_size(c1)
* - task:
* - num_nulls(c1)
* - task:
* - num_nulls(c2)
*/
// Verify the results
List<StatisticsTask> tasks = statisticsJob.getTasks();
Assert.assertEquals(13, tasks.size());
int sqlTaskCount = 0;
int metaTaskCount = 0;
for (StatisticsTask task : tasks) {
if (task instanceof SQLStatisticsTask) {
sqlTaskCount++;
} else if (task instanceof MetaStatisticsTask) {
metaTaskCount++;
} else {
Assert.fail("Unknown task type.");
}
}
Assert.assertEquals(2, metaTaskCount);
Assert.assertEquals(11, sqlTaskCount);
}
}

View File

@ -1,120 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.common.DdlException;
import org.apache.doris.statistics.StatisticsJob.JobState;
import org.apache.doris.statistics.StatisticsTask.TaskState;
import com.google.common.collect.Maps;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
public class StatisticsJobTest {
private StatisticsJob statisticsJobUnderTest;
private StatisticsTask statisticsTaskUnderTest;
@Before
public void setUp() throws Exception {
HashSet<Long> tblIds = new HashSet<>(Collections.singletonList(0L));
Map<Long, List<String>> tblIdToPartitionName = Maps.newHashMap();
Map<Long, List<String>> tableIdToColumnName = Maps.newHashMap();
statisticsJobUnderTest = new StatisticsJob(0L, tblIds, tblIdToPartitionName,
tableIdToColumnName, new HashMap<>());
StatsCategory statsCategory = new StatsCategory();
StatsGranularity statsGranularity = new StatsGranularity();
List<StatsType> statsTypes = Collections.singletonList(StatsType.ROW_COUNT);
statisticsTaskUnderTest = new SQLStatisticsTask(0L,
Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes)));
List<StatisticsTask> tasks = statisticsJobUnderTest.getTasks();
tasks.add(statisticsTaskUnderTest);
}
@Test
public void testUpdateJobState() throws Exception {
// Run the test
statisticsJobUnderTest.updateJobState(JobState.SCHEDULING);
// Verify the results
JobState jobState = statisticsJobUnderTest.getJobState();
Assert.assertEquals(JobState.SCHEDULING, jobState);
}
@Test
public void testUpdateJobState_ThrowsDdlException() {
// Run the test
Assert.assertThrows(DdlException.class,
() -> statisticsJobUnderTest.updateJobState(JobState.RUNNING));
}
@Test
public void testUpdateJobInfoByTaskId() throws Exception {
// Setup
statisticsJobUnderTest.updateJobState(JobState.SCHEDULING);
statisticsJobUnderTest.updateJobState(JobState.RUNNING);
statisticsTaskUnderTest.updateTaskState(TaskState.RUNNING);
// Run the test
long taskId = statisticsTaskUnderTest.getId();
statisticsJobUnderTest.updateJobInfoByTaskId(taskId, "");
// Verify the results
JobState jobState = statisticsJobUnderTest.getJobState();
Assert.assertEquals(JobState.FINISHED, jobState);
TaskState taskState = statisticsTaskUnderTest.getTaskState();
Assert.assertEquals(TaskState.FINISHED, taskState);
}
@Test
public void testUpdateJobInfoByTaskIdFailed() throws Exception {
// Setup
statisticsJobUnderTest.updateJobState(JobState.SCHEDULING);
statisticsJobUnderTest.updateJobState(JobState.RUNNING);
statisticsTaskUnderTest.updateTaskState(TaskState.RUNNING);
// Run the test
long taskId = statisticsTaskUnderTest.getId();
statisticsJobUnderTest.updateJobInfoByTaskId(taskId, "errorMsg");
// Verify the results
JobState jobState = statisticsJobUnderTest.getJobState();
Assert.assertEquals(JobState.FAILED, jobState);
TaskState taskState = statisticsTaskUnderTest.getTaskState();
Assert.assertEquals(TaskState.FAILED, taskState);
}
@Test
public void testUpdateJobInfoByTaskId_ThrowsDdlException() {
// Run the test
long taskId = statisticsTaskUnderTest.getId();
Assert.assertThrows(DdlException.class,
() -> statisticsJobUnderTest.updateJobInfoByTaskId(taskId, ""));
}
}

View File

@ -1,167 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.DropTableStatsStmt;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.HashDistributionInfo;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.jmockit.Deencapsulation;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
import org.apache.doris.statistics.StatsCategory.Category;
import org.apache.doris.statistics.StatsGranularity.Granularity;
import com.google.common.collect.Maps;
import mockit.Expectations;
import mockit.Mocked;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
public class StatisticsManagerTest {
private StatisticsManager statisticsManagerUnderTest;
@Before
public void setUp() throws Exception {
Column col1 = new Column("c1", PrimitiveType.STRING);
Column col2 = new Column("c2", PrimitiveType.INT);
OlapTable tbl1 = new OlapTable(0L, "tbl1", Arrays.asList(col1, col2), KeysType.AGG_KEYS,
new PartitionInfo(), new HashDistributionInfo());
OlapTable tbl2 = new OlapTable(1L, "tbl2", Arrays.asList(col1, col2), KeysType.DUP_KEYS,
new PartitionInfo(), new HashDistributionInfo());
Database database = new Database(0L, "db");
database.createTable(tbl1);
database.createTable(tbl2);
InternalCatalog catalog = Env.getCurrentInternalCatalog();
ConcurrentHashMap<String, Database> fullNameToDb = new ConcurrentHashMap<>();
fullNameToDb.put("cluster:db", database);
Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb);
ConcurrentHashMap<Long, Database> idToDb = new ConcurrentHashMap<>();
idToDb.put(0L, database);
Deencapsulation.setField(catalog, "idToDb", idToDb);
statisticsManagerUnderTest = new StatisticsManager();
}
@Test
public void testUpdateStatistics() throws Exception {
// Setup
TaskResult taskResult = new TaskResult();
taskResult.setDbId(0L);
taskResult.setTableId(0L);
taskResult.setCategory(Category.TABLE);
taskResult.setGranularity(Granularity.TABLE);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
taskResult.setStatsTypeToValue(statsTypeToValue);
List<StatisticsTaskResult> statsTaskResults = Collections.singletonList(
new StatisticsTaskResult(Collections.singletonList(taskResult)));
// Run the test
statisticsManagerUnderTest.updateStatistics(statsTaskResults);
Statistics statistics = statisticsManagerUnderTest.getStatistics();
TableStats tableStats = statistics.getTableStats(0L);
// Verify the results
double rowCount = tableStats.getRowCount();
Assert.assertEquals(1000L, rowCount, 0.1);
long dataSize = tableStats.getDataSize();
Assert.assertEquals(10240L, dataSize);
}
@Test
public void testUpdateStatistics_ThrowsAnalysisException() {
// Setup
TaskResult taskResult = new TaskResult();
taskResult.setDbId(0L);
taskResult.setTableId(1L);
taskResult.setPartitionName("partitionName");
taskResult.setColumnName("columnName");
taskResult.setCategory(Category.TABLE);
taskResult.setGranularity(Granularity.TABLE);
taskResult.setStatsTypeToValue(new HashMap<>());
List<StatisticsTaskResult> statsTaskResults = Collections.singletonList(
new StatisticsTaskResult(Collections.singletonList(taskResult)));
// Run the test
Assert.assertThrows(AnalysisException.class,
() -> statisticsManagerUnderTest.updateStatistics(statsTaskResults));
}
@Test
public void testDropStats(@Mocked DropTableStatsStmt stmt) throws AnalysisException {
TaskResult taskResult = new TaskResult();
taskResult.setDbId(0L);
taskResult.setTableId(0L);
taskResult.setCategory(Category.TABLE);
taskResult.setGranularity(Granularity.TABLE);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
taskResult.setStatsTypeToValue(statsTypeToValue);
List<StatisticsTaskResult> statsTaskResults = Collections.singletonList(
new StatisticsTaskResult(Collections.singletonList(taskResult)));
statisticsManagerUnderTest.updateStatistics(statsTaskResults);
Map<Long, Set<String>> tblIdToPartition = Maps.newHashMap();
tblIdToPartition.put(0L, null);
new Expectations() {
{
stmt.getTblIdToPartition();
this.minTimes = 0;
this.result = tblIdToPartition;
}
};
// Run the test
statisticsManagerUnderTest.dropStats(stmt);
// Verify the results
Statistics statistics = statisticsManagerUnderTest.getStatistics();
TableStats statsOrDefault = statistics.getTableStatsOrDefault(0L);
double rowCount = statsOrDefault.getRowCount();
Assert.assertEquals(-1.0f, rowCount, 0.0001);
double dataSize = statsOrDefault.getDataSize();
Assert.assertEquals(-1.0f, dataSize, 0.0001);
}
}

View File

@ -1,267 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
public class StatisticsTest {
private Statistics statisticsUnderTest;
@Before
public void setUp() throws Exception {
statisticsUnderTest = new Statistics();
}
@Test
public void testUpdateTableStats() throws Exception {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
// Run the test
statisticsUnderTest.updateTableStats(0L, statsTypeToValue);
long rowCount = (long) statisticsUnderTest.getTableStats(0L).getRowCount();
// Verify the results
Assert.assertEquals(1000L, rowCount);
}
@Test
public void testUpdateTableStats_ThrowsAnalysisException() {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "-100");
// Run the test
Assert.assertThrows(AnalysisException.class,
() -> statisticsUnderTest.updateTableStats(0L, statsTypeToValue));
}
@Test
public void testUpdatePartitionStats() throws Exception {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
// Run the test
statisticsUnderTest.updatePartitionStats(0L, "partitionName", statsTypeToValue);
Map<String, PartitionStats> partitionStats = statisticsUnderTest
.getPartitionStats(0L, "partitionName");
long rowCount = partitionStats.get("partitionName").getRowCount();
// Verify the results
Assert.assertEquals(1000L, rowCount);
}
@Test
public void testUpdatePartitionStats_ThrowsAnalysisException() {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "-100");
// Run the test
Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest
.updatePartitionStats(0L, "partitionName", statsTypeToValue));
}
@Test
public void testUpdateTableColumnStats() throws Exception {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.NUM_NULLS, "1000");
// Run the test
statisticsUnderTest.updateColumnStats(0L, "columnName", columnType, statsTypeToValue);
Map<String, ColumnStat> columnStats = statisticsUnderTest.getColumnStats(0L);
long numNulls = (long) columnStats.get("columnName").getNumNulls();
// Verify the results
Assert.assertEquals(1000L, numNulls);
}
@Test
public void testUpdateTableColumnStats_ThrowsAnalysisException() {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.MAX_VALUE, "ABC");
// Run the test
Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest
.updateColumnStats(0L, "columnName", columnType, statsTypeToValue));
}
@Test
public void testUpdatePartitionColumnStats() throws Exception {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.NUM_NULLS, "1000");
// Run the test
statisticsUnderTest.updateColumnStats(0L, "partitionName",
"columnName", columnType, statsTypeToValue);
Map<String, ColumnStat> columnStats = statisticsUnderTest
.getColumnStats(0L, "partitionName");
long numNulls = (long) columnStats.get("columnName").getNumNulls();
// Verify the results
Assert.assertEquals(1000L, numNulls);
}
@Test
public void testUpdatePartitionColumnStats_ThrowsAnalysisException() {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "ABC");
// Run the test
Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest.updateColumnStats(
0L, "partitionName", "columnName", columnType, statsTypeToValue));
}
@Test
public void testGetTableStats() throws Exception {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statisticsUnderTest.updateTableStats(0L, statsTypeToValue);
// Run the test
TableStats result = statisticsUnderTest.getTableStats(0L);
// Verify the results
double rowCount = result.getRowCount();
Assert.assertEquals(1000, rowCount, 0.1);
}
@Test
public void testGetTableStats_ThrowsAnalysisException() {
// Verify the results
Assert.assertThrows(AnalysisException.class,
() -> statisticsUnderTest.getTableStats(0L));
}
@Test
public void testGetPartitionStats() throws Exception {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statisticsUnderTest.updatePartitionStats(0L, "partitionName", statsTypeToValue);
// Run the test
Map<String, PartitionStats> result = statisticsUnderTest.getPartitionStats(0L);
// Verify the results
PartitionStats partitionStats = result.get("partitionName");
long rowCount = partitionStats.getRowCount();
Assert.assertEquals(1000, rowCount);
}
@Test
public void testGetPartitionStats1_ThrowsAnalysisException() {
// Verify the results
Assert.assertThrows(AnalysisException.class,
() -> statisticsUnderTest.getPartitionStats(0L));
}
@Test
public void testGetPartitionStatsWithName() throws Exception {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statisticsUnderTest.updatePartitionStats(0L, "partitionName", statsTypeToValue);
// Run the test
Map<String, PartitionStats> result = statisticsUnderTest
.getPartitionStats(0L, "partitionName");
// Verify the results
PartitionStats partitionStats = result.get("partitionName");
long rowCount = partitionStats.getRowCount();
Assert.assertEquals(1000, rowCount);
}
@Test
public void testGetPartitionStatsWithName_ThrowsAnalysisException() {
// Run the test
Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest
.getPartitionStats(0L, "partitionName"));
}
@Test
public void testGetTableColumnStats() throws Exception {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.NUM_NULLS, "1000");
statisticsUnderTest.updateColumnStats(0L, "columnName", columnType, statsTypeToValue);
// Run the test
Map<String, ColumnStat> result = statisticsUnderTest.getColumnStats(0L);
// Verify the results
ColumnStat columnStats = result.get("columnName");
double numNulls = columnStats.getNumNulls();
Assert.assertEquals(1000, numNulls, 0.1);
}
@Test
public void testGetTableColumnStats_ThrowsAnalysisException() {
// Verify the results
Assert.assertThrows(AnalysisException.class,
() -> statisticsUnderTest.getColumnStats(0L));
}
@Test
public void testGetPartitionColumnStats() throws Exception {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.STRING);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.NUM_NULLS, "1000");
statisticsUnderTest.updateColumnStats(0L, "partitionName",
"columnName", columnType, statsTypeToValue);
// Run the test
Map<String, ColumnStat> result = statisticsUnderTest
.getColumnStats(0L, "partitionName");
// Verify the results
ColumnStat columnStats = result.get("columnName");
double numNulls = columnStats.getNumNulls();
Assert.assertEquals(1000, numNulls, 0.1);
}
@Test
public void testGetPartitionColumnStats_ThrowsAnalysisException() {
// Verify the results
Assert.assertThrows(AnalysisException.class, () -> statisticsUnderTest
.getColumnStats(0L, "partitionName"));
}
}

View File

@ -1,182 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class TableStatsTest {
private TableStats tableStatsUnderTest;
@Before
public void setUp() throws Exception {
tableStatsUnderTest = new TableStats();
}
@Test
public void testUpdateTableStats() throws Exception {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
// Run the test
tableStatsUnderTest.updateTableStats(statsTypeToValue);
// Verify the results
double rowCount = tableStatsUnderTest.getRowCount();
Assert.assertEquals(1000, rowCount, 0.01);
long dataSize = tableStatsUnderTest.getDataSize();
Assert.assertEquals(10240, dataSize);
}
@Test
public void testUpdateTableStats_ThrowsAnalysisException() {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.AVG_SIZE, "8");
statsTypeToValue.put(StatsType.ROW_COUNT, "abc");
// Run the test
Assert.assertThrows(AnalysisException.class,
() -> tableStatsUnderTest.updateTableStats(statsTypeToValue));
}
@Test
public void testUpdatePartitionStats() throws Exception {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
// Run the test
tableStatsUnderTest.updatePartitionStats("partitionName", statsTypeToValue);
PartitionStats partitionStats = tableStatsUnderTest.getNameToPartitionStats().get("partitionName");
// Verify the results
long rowCount = partitionStats.getRowCount();
Assert.assertEquals(1000, rowCount);
long dataSize = partitionStats.getDataSize();
Assert.assertEquals(10240, dataSize);
}
@Test
public void testUpdatePartitionStats_ThrowsAnalysisException() {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "abc");
// Run the test
Assert.assertThrows(AnalysisException.class, () -> tableStatsUnderTest
.updatePartitionStats("partitionName", statsTypeToValue));
}
@Test
public void testUpdateColumnStats() throws Exception {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.NDV, "1");
statsTypeToValue.put(StatsType.AVG_SIZE, "8");
statsTypeToValue.put(StatsType.MAX_SIZE, "8");
statsTypeToValue.put(StatsType.NUM_NULLS, "2");
statsTypeToValue.put(StatsType.MIN_VALUE, "0");
statsTypeToValue.put(StatsType.MAX_VALUE, "1000");
// Run the test
tableStatsUnderTest.updateColumnStats("columnName", columnType, statsTypeToValue);
ColumnStat columnStats = tableStatsUnderTest.getColumnStats("columnName");
// Verify the results
double ndv = columnStats.getNdv();
Assert.assertEquals(1L, ndv, 0.01);
double avgSize = columnStats.getAvgSizeByte();
Assert.assertEquals(8.0f, avgSize, 0.0001);
double maxSize = columnStats.getMaxSizeByte();
Assert.assertEquals(8L, maxSize, 0.01);
double maxValue = columnStats.getMaxValue();
Assert.assertEquals(1000, maxValue, 0.01);
double minValue = columnStats.getMinValue();
Assert.assertEquals(0L, minValue, 0.01);
double numNulls = columnStats.getNumNulls();
Assert.assertEquals(2, numNulls, 0.01);
}
@Test
public void testUpdateColumnStats_ThrowsAnalysisException() {
// Setup
Type columnType = Type.fromPrimitiveType(PrimitiveType.INVALID_TYPE);
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.AVG_SIZE, "abc");
// Run the test
Assert.assertThrows(AnalysisException.class, () -> tableStatsUnderTest
.updateColumnStats("columnName", columnType, statsTypeToValue));
}
@Test
public void testGetShowInfo() throws AnalysisException {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
tableStatsUnderTest.updateTableStats(statsTypeToValue);
String[] expectedInfo = {"1000.0", "10240"};
// Run the test
List<String> showInfo = tableStatsUnderTest.getShowInfo();
String[] result = showInfo.toArray(new String[0]);
// Verify the results
Assert.assertArrayEquals(expectedInfo, result);
}
@Test
public void testGetShowInfoWithPartitionName() throws AnalysisException {
// Setup
Map<StatsType, String> statsTypeToValue = new HashMap<>();
statsTypeToValue.put(StatsType.ROW_COUNT, "1000");
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
tableStatsUnderTest.updatePartitionStats("partitionName", statsTypeToValue);
String[] expectedInfo = {"1000", "10240"};
// Run the test
List<String> showInfo = tableStatsUnderTest.getShowInfo("partitionName");
String[] result = showInfo.toArray(new String[0]);
// Verify the results
Assert.assertArrayEquals(expectedInfo, result);
}
}