[Optimization](String) Optimize the injection of statistics. #18401

1. Remove useless partition statistics injection.
2. Adding judgment logic to avoid exception during numerical transformation.
This commit is contained in:
ElvinWei
2023-04-06 11:42:11 +08:00
committed by GitHub
parent f73189860f
commit 9a916cffe4
4 changed files with 30 additions and 36 deletions

View File

@ -1326,7 +1326,7 @@ alter_stmt ::=
| KW_ALTER KW_TABLE table_name:tbl KW_MODIFY KW_COLUMN ident:columnName
KW_SET KW_STATS LPAREN key_value_map:map RPAREN opt_partition_names:partitionNames
{:
RESULT = new AlterColumnStatsStmt(tbl, columnName, map, partitionNames);
RESULT = new AlterColumnStatsStmt(tbl, columnName, map);
:}
| KW_ALTER KW_TABLE table_name:tbl KW_SET LPAREN key_value_map:properties RPAREN
{:

View File

@ -20,7 +20,6 @@ package org.apache.doris.analysis;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.catalog.Table;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
@ -42,15 +41,18 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
/**
* Manually inject statistics for columns.
* For partitioned tables, partitions must be specified, or statistics cannot be updated,
* and only OLAP table statistics are supported.
* e.g.
* Only OLAP table statistics are supported.
*
* Syntax:
* ALTER TABLE table_name MODIFY COLUMN columnName
* SET STATS ('k1' = 'v1', ...) [ PARTITIONS(p_name1, p_name2...) ]
* SET STATS ('k1' = 'v1', ...);
*
* e.g.
* ALTER TABLE stats_test.example_tbl MODIFY COLUMN age
* SET STATS ('row_count'='6001215');
*/
public class AlterColumnStatsStmt extends DdlStmt {
@ -66,7 +68,6 @@ public class AlterColumnStatsStmt extends DdlStmt {
.build();
private final TableName tableName;
private final PartitionNames optPartitionNames;
private final String columnName;
private final Map<String, String> properties;
@ -74,11 +75,10 @@ public class AlterColumnStatsStmt extends DdlStmt {
private final Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
public AlterColumnStatsStmt(TableName tableName, String columnName,
Map<String, String> properties, PartitionNames optPartitionNames) {
Map<String, String> properties) {
this.tableName = tableName;
this.columnName = columnName;
this.properties = properties == null ? Collections.emptyMap() : properties;
this.optPartitionNames = optPartitionNames;
}
public TableName getTableName() {
@ -108,7 +108,7 @@ public class AlterColumnStatsStmt extends DdlStmt {
Util.prohibitExternalCatalog(tableName.getCtl(), this.getClass().getSimpleName());
// check partition & column
checkPartitionAndColumnNames();
checkColumnNames();
// check properties
Optional<StatsType> optional = properties.keySet().stream().map(StatsType::fromString)
@ -136,7 +136,7 @@ public class AlterColumnStatsStmt extends DdlStmt {
/**
* TODO(wzt): Support for external tables
*/
private void checkPartitionAndColumnNames() throws AnalysisException {
private void checkColumnNames() throws AnalysisException {
Database db = analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(tableName.getDb());
Table table = db.getTableOrAnalysisException(tableName.getTbl());
@ -149,22 +149,6 @@ public class AlterColumnStatsStmt extends DdlStmt {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
columnName, FeNameFormat.getColumnNameRegex());
}
if (optPartitionNames != null) {
if (olapTable.getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) {
throw new AnalysisException("Not a partitioned table: " + olapTable.getName());
}
optPartitionNames.analyze(analyzer);
Set<String> olapPartitionNames = olapTable.getPartitionNames();
Optional<String> optional = optPartitionNames.getPartitionNames().stream()
.filter(name -> !olapPartitionNames.contains(name))
.findFirst();
if (optional.isPresent()) {
throw new AnalysisException("Partition does not exist: " + optional.get());
}
partitionNames.addAll(optPartitionNames.getPartitionNames());
}
}
@Override
@ -179,10 +163,7 @@ public class AlterColumnStatsStmt extends DdlStmt {
sb.append(new PrintableMap<>(properties,
" = ", true, false));
sb.append(")");
if (optPartitionNames != null) {
sb.append(" ");
sb.append(optPartitionNames.toSql());
}
return sb.toString();
}

View File

@ -144,10 +144,14 @@ public class ColumnStatistic {
}
String min = resultRow.getColumnValue("min");
String max = resultRow.getColumnValue("max");
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min));
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max));
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
if (!StatisticsUtil.isNullOrEmpty(min)) {
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min));
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
}
if (!StatisticsUtil.isNullOrEmpty(max)) {
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max));
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
}
columnStatisticBuilder.setSelectivity(1.0);
columnStatisticBuilder.setOriginalNdv(ndv);
Histogram histogram = Env.getCurrentEnv().getStatisticsCache().getHistogram(tblId, idxId, colName);

View File

@ -64,6 +64,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.stream.Collectors;
@ -280,4 +281,12 @@ public class StatisticsUtil {
}
return tblIf.getColumn(columnName);
}
public static boolean isNullOrEmpty(String str) {
return Optional.ofNullable(str)
.map(String::trim)
.map(String::toLowerCase)
.map(s -> "null".equalsIgnoreCase(s) || s.isEmpty())
.orElse(true);
}
}