Support analyze materialized view. (#30540)

This commit is contained in:
Jibing-Li
2024-02-04 14:27:09 +08:00
committed by yiguolei
parent e891a095e7
commit 9e76592297
43 changed files with 1291 additions and 879 deletions

View File

@ -21,6 +21,7 @@ import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.View;
import org.apache.doris.catalog.external.ExternalTable;
@ -147,26 +148,26 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
}
checkAnalyzePriv(tableName.getDb(), tableName.getTbl());
if (columnNames == null) {
// Filter unsupported type columns.
columnNames = table.getBaseSchema(false).stream()
columnNames = table.getSchemaAllIndexes(false).stream()
// Filter unsupported type columns.
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName)
.collect(Collectors.toList());
}
table.readLock();
try {
List<String> baseSchema = table.getBaseSchema(false)
.stream().map(Column::getName).collect(Collectors.toList());
Optional<String> optional = columnNames.stream()
.filter(entity -> !baseSchema.contains(entity)).findFirst();
if (optional.isPresent()) {
String columnName = optional.get();
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
columnName, FeNameFormat.getColumnNameRegex());
} else {
table.readLock();
try {
List<String> baseSchema = table.getSchemaAllIndexes(false)
.stream().map(Column::getName).collect(Collectors.toList());
Optional<String> optional = columnNames.stream()
.filter(entity -> !baseSchema.contains(entity)).findFirst();
if (optional.isPresent()) {
String columnName = optional.get();
ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, tableName.getTbl());
}
checkColumn();
} finally {
table.readUnlock();
}
checkColumn();
} finally {
table.readUnlock();
}
analyzeProperties.check();
@ -187,7 +188,9 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
private void checkColumn() throws AnalysisException {
boolean containsUnsupportedTytpe = false;
for (String colName : columnNames) {
Column column = table.getColumn(colName);
Column column = table instanceof OlapTable
? ((OlapTable) table).getVisibleColumn(colName)
: table.getColumn(colName);
if (column == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
colName, FeNameFormat.getColumnNameRegex());
@ -200,7 +203,9 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
if (ConnectContext.get() == null
|| !ConnectContext.get().getSessionVariable().enableAnalyzeComplexTypeColumn) {
columnNames = columnNames.stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(table.getColumn(c).getType()))
.filter(c -> !StatisticsUtil.isUnsupportedType(table instanceof OlapTable
? ((OlapTable) table).getVisibleColumn(c).getType()
: table.getColumn(c).getType()))
.collect(Collectors.toList());
} else {
throw new AnalysisException(
@ -233,8 +238,7 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
}
public Set<String> getColumnNames() {
return columnNames == null ? table.getBaseSchema(false)
.stream().map(Column::getName).collect(Collectors.toSet()) : Sets.newHashSet(columnNames);
return Sets.newHashSet(columnNames);
}
public Set<String> getPartitionNames() {

View File

@ -122,7 +122,7 @@ public class DropStatsStmt extends DdlStmt {
}
} else {
isAllColumns = true;
columnNames = table.getColumns().stream().map(Column::getName).collect(Collectors.toSet());
columnNames = table.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet());
}
}

View File

@ -32,6 +32,7 @@ public class ShowAnalyzeTaskStatus extends ShowStmt {
ShowResultSetMetaData.builder()
.addColumn(new Column("task_id", ScalarType.createVarchar(100)))
.addColumn(new Column("col_name", ScalarType.createVarchar(1000)))
.addColumn(new Column("index_name", ScalarType.createVarchar(1000)))
.addColumn(new Column("message", ScalarType.createVarchar(1000)))
.addColumn(new Column("last_state_change_time", ScalarType.createVarchar(1000)))
.addColumn(new Column("time_cost_in_ms", ScalarType.createVarchar(1000)))

View File

@ -48,6 +48,7 @@ public class ShowColumnStatsStmt extends ShowStmt {
private static final ImmutableList<String> TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("column_name")
.add("index_name")
.add("count")
.add("ndv")
.add("num_null")
@ -135,7 +136,7 @@ public class ShowColumnStatsStmt extends ShowStmt {
return table;
}
public ShowResultSet constructResultSet(List<Pair<String, ColumnStatistic>> columnStatistics) {
public ShowResultSet constructResultSet(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics) {
List<List<String>> result = Lists.newArrayList();
columnStatistics.forEach(p -> {
if (p.second.isUnKnown) {
@ -143,7 +144,8 @@ public class ShowColumnStatsStmt extends ShowStmt {
}
List<String> row = Lists.newArrayList();
row.add(p.first);
row.add(p.first.first);
row.add(p.first.second);
row.add(String.valueOf(p.second.count));
row.add(String.valueOf(p.second.ndv));
row.add(String.valueOf(p.second.numNulls));
@ -152,7 +154,7 @@ public class ShowColumnStatsStmt extends ShowStmt {
row.add(String.valueOf(p.second.minExpr == null ? "N/A" : p.second.minExpr.toSql()));
row.add(String.valueOf(p.second.maxExpr == null ? "N/A" : p.second.maxExpr.toSql()));
ColStatsMeta colStatsMeta = Env.getCurrentEnv().getAnalysisManager().findColStatsMeta(table.getId(),
p.first);
p.first.first);
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.analysisMethod));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.analysisType));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.jobType));

View File

@ -21,6 +21,7 @@ import org.apache.doris.alter.MaterializedViewHandler;
import org.apache.doris.analysis.AggregateInfo;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.ColumnDef;
import org.apache.doris.analysis.CreateMaterializedViewStmt;
import org.apache.doris.analysis.DataSortInfo;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.SlotDescriptor;
@ -492,6 +493,17 @@ public class OlapTable extends Table implements MTMVRelatedTableIf {
return null;
}
public List<Long> getMvColumnIndexIds(String columnName) {
List<Long> ids = Lists.newArrayList();
for (MaterializedIndexMeta meta : getVisibleIndexIdToMeta().values()) {
Column target = meta.getColumnByDefineName(columnName);
if (target != null) {
ids.add(meta.getIndexId());
}
}
return ids;
}
@Override
public long getUpdateTime() {
long updateTime = tempPartitions.getUpdateTime();
@ -698,6 +710,15 @@ public class OlapTable extends Table implements MTMVRelatedTableIf {
}
}
@Override
public List<Column> getSchemaAllIndexes(boolean full) {
List<Column> columns = Lists.newArrayList();
for (Long indexId : indexIdToMeta.keySet()) {
columns.addAll(getSchemaByIndexId(indexId, full));
}
return columns;
}
public List<Column> getBaseSchemaKeyColumns() {
return getKeyColumnsByIndexId(baseIndexId);
}
@ -1207,25 +1228,29 @@ public class OlapTable extends Table implements MTMVRelatedTableIf {
@Override
public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
TableIf table = this;
TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(table.getId());
Set<String> allPartitions = table.getPartitionNames().stream().map(table::getPartition)
TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(getId());
Set<String> allPartitions = getPartitionNames().stream().map(this::getPartition)
.filter(Partition::hasData).map(Partition::getName).collect(Collectors.toSet());
if (tableStats == null) {
return table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.collect(Collectors.toMap(Column::getName, v -> allPartitions));
Map<String, Set<String>> ret = Maps.newHashMap();
for (Column col : getSchemaAllIndexes(false)) {
if (StatisticsUtil.isUnsupportedType(col.getType())) {
continue;
}
ret.put(col.getName(), allPartitions);
}
return ret;
}
Map<String, Set<String>> colToPart = new HashMap<>();
for (Column col : table.getBaseSchema()) {
for (Column col : getSchemaAllIndexes(false)) {
if (StatisticsUtil.isUnsupportedType(col.getType())) {
continue;
}
long lastUpdateTime = tableStats.findColumnLastUpdateTime(col.getName());
Set<String> partitions = table.getPartitionNames().stream()
.map(table::getPartition)
Set<String> partitions = getPartitionNames().stream()
.map(this::getPartition)
.filter(Partition::hasData)
.filter(partition ->
partition.getVisibleVersionTime() >= lastUpdateTime).map(Partition::getName)
.filter(partition -> partition.getVisibleVersionTime() >= lastUpdateTime).map(Partition::getName)
.collect(Collectors.toSet());
colToPart.put(col.getName(), partitions);
}
@ -1241,6 +1266,15 @@ public class OlapTable extends Table implements MTMVRelatedTableIf {
return rowCount;
}
public long getRowCountForIndex(long indexId) {
long rowCount = 0;
for (Map.Entry<Long, Partition> entry : idToPartition.entrySet()) {
MaterializedIndex index = entry.getValue().getIndex(indexId);
rowCount += index == null ? 0 : index.getRowCount();
}
return rowCount;
}
@Override
public long getCacheRowCount() {
return getRowCount();
@ -2472,8 +2506,12 @@ public class OlapTable extends Table implements MTMVRelatedTableIf {
@Override
public boolean isPartitionColumn(String columnName) {
if (columnName.startsWith(CreateMaterializedViewStmt.MATERIALIZED_VIEW_NAME_PREFIX)) {
columnName = columnName.substring(CreateMaterializedViewStmt.MATERIALIZED_VIEW_NAME_PREFIX.length());
}
String finalColumnName = columnName;
return getPartitionInfo().getPartitionColumns().stream()
.anyMatch(c -> c.getName().equalsIgnoreCase(columnName));
.anyMatch(c -> c.getName().equalsIgnoreCase(finalColumnName));
}
/**

View File

@ -356,6 +356,11 @@ public abstract class Table extends MetaObject implements Writable, TableIf {
return getBaseSchema(Util.showHiddenColumns());
}
@Override
public List<Column> getSchemaAllIndexes(boolean full) {
return getBaseSchema();
}
public List<Column> getBaseSchema(boolean full) {
if (full) {
return fullSchema;

View File

@ -94,6 +94,8 @@ public interface TableIf {
List<Column> getBaseSchema();
List<Column> getSchemaAllIndexes(boolean full);
default List<Column> getBaseSchemaOrEmpty() {
try {
return getBaseSchema();

View File

@ -256,6 +256,11 @@ public class ExternalTable implements TableIf, Writable, GsonPostProcessable {
return getFullSchema();
}
@Override
public List<Column> getSchemaAllIndexes(boolean full) {
return getBaseSchema();
}
@Override
public List<Column> getBaseSchema(boolean full) {
return getFullSchema();

View File

@ -241,8 +241,9 @@ public class MinidumpUtils {
}
private static ColumnStatistic getColumnStatistic(TableIf table, String colName) {
// TODO. Get index id for materialized view.
return Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(
table.getDatabase().getCatalog().getId(), table.getDatabase().getId(), table.getId(), colName);
table.getDatabase().getCatalog().getId(), table.getDatabase().getId(), table.getId(), -1, colName);
}
private static Histogram getColumnHistogram(TableIf table, String colName) {

View File

@ -18,6 +18,7 @@
package org.apache.doris.nereids.rules.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.TableIf;
@ -197,9 +198,14 @@ public class BindRelation extends OneAnalysisRuleFactory {
throw new AnalysisException("Table " + olapTable.getName()
+ " doesn't have materialized view " + indexName.get());
}
PreAggStatus preAggStatus
= olapTable.getIndexMetaByIndexId(indexId).getKeysType().equals(KeysType.DUP_KEYS)
? PreAggStatus.on()
: PreAggStatus.off("For direct index scan.");
scan = new LogicalOlapScan(unboundRelation.getRelationId(),
(OlapTable) table, ImmutableList.of(tableQualifier.get(1)), tabletIds, indexId,
unboundRelation.getHints(), unboundRelation.getTableSample());
preAggStatus, unboundRelation.getHints(), unboundRelation.getTableSample());
} else {
scan = new LogicalOlapScan(unboundRelation.getRelationId(),
(OlapTable) table, ImmutableList.of(tableQualifier.get(1)), tabletIds, unboundRelation.getHints(),

View File

@ -633,8 +633,9 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
return ColumnStatistic.UNKNOWN;
}
} else {
// TODO. Get index id for materialized view.
return Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(
catalogId, dbId, table.getId(), colName);
catalogId, dbId, table.getId(), -1, colName);
}
}

View File

@ -145,10 +145,11 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan
}
public LogicalOlapScan(RelationId id, OlapTable table, List<String> qualifier, List<Long> tabletIds,
long selectedIndexId, List<String> hints, Optional<TableSample> tableSample) {
long selectedIndexId, PreAggStatus preAggStatus, List<String> hints,
Optional<TableSample> tableSample) {
this(id, table, qualifier, Optional.empty(), Optional.empty(),
table.getPartitionIds(), false, tabletIds,
selectedIndexId, true, PreAggStatus.off("For direct index scan."),
selectedIndexId, true, preAggStatus,
ImmutableList.of(), hints, Maps.newHashMap(), tableSample, true, false);
}

View File

@ -2502,43 +2502,54 @@ public class ShowExecutor {
ShowColumnStatsStmt showColumnStatsStmt = (ShowColumnStatsStmt) stmt;
TableName tableName = showColumnStatsStmt.getTableName();
TableIf tableIf = showColumnStatsStmt.getTable();
List<Pair<String, ColumnStatistic>> columnStatistics = new ArrayList<>();
List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics = new ArrayList<>();
Set<String> columnNames = showColumnStatsStmt.getColumnNames();
PartitionNames partitionNames = showColumnStatsStmt.getPartitionNames();
boolean showCache = showColumnStatsStmt.isCached();
for (String colName : columnNames) {
// Show column statistics in columnStatisticsCache. For validation.
if (showCache) {
ColumnStatistic columnStatistic = Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(
tableIf.getDatabase().getCatalog().getId(),
tableIf.getDatabase().getId(), tableIf.getId(), colName);
columnStatistics.add(Pair.of(colName, columnStatistic));
} else if (partitionNames == null) {
ColumnStatistic columnStatistic =
StatisticsRepository.queryColumnStatisticsByName(tableIf.getId(), colName);
columnStatistics.add(Pair.of(colName, columnStatistic));
// Olap base index use -1 as index id.
List<Long> indexIds = Lists.newArrayList();
if (StatisticsUtil.isMvColumn(tableIf, colName)) {
OlapTable olapTable = (OlapTable) tableIf;
indexIds = olapTable.getMvColumnIndexIds(colName);
} else {
columnStatistics.addAll(StatisticsRepository.queryColumnStatisticsByPartitions(tableName,
colName, showColumnStatsStmt.getPartitionNames().getPartitionNames())
.stream().map(s -> Pair.of(colName, s))
.collect(Collectors.toList()));
indexIds.add(-1L);
}
for (long indexId : indexIds) {
String indexName = "N/A";
if (indexId != -1) {
indexName = ((OlapTable) tableIf).getIndexNameById(indexId);
if (indexName == null) {
continue;
}
}
// Show column statistics in columnStatisticsCache.
if (showCache) {
ColumnStatistic columnStatistic = Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(
tableIf.getDatabase().getCatalog().getId(),
tableIf.getDatabase().getId(), tableIf.getId(), indexId, colName);
columnStatistics.add(Pair.of(Pair.of(colName, indexName), columnStatistic));
} else if (partitionNames == null) {
ColumnStatistic columnStatistic =
StatisticsRepository.queryColumnStatisticsByName(tableIf.getId(), indexId, colName);
columnStatistics.add(Pair.of(Pair.of(colName, indexName), columnStatistic));
} else {
String finalIndexName = indexName;
columnStatistics.addAll(StatisticsRepository.queryColumnStatisticsByPartitions(tableName,
colName, showColumnStatsStmt.getPartitionNames().getPartitionNames())
.stream().map(s -> Pair.of(Pair.of(colName, finalIndexName), s))
.collect(Collectors.toList()));
}
}
}
resultSet = showColumnStatsStmt.constructResultSet(columnStatistics);
}
public void handleShowColumnHist() {
// TODO: support histogram in the future.
ShowColumnHistStmt showColumnHistStmt = (ShowColumnHistStmt) stmt;
TableIf tableIf = showColumnHistStmt.getTable();
Set<String> columnNames = showColumnHistStmt.getColumnNames();
List<Pair<String, Histogram>> columnStatistics = columnNames.stream()
.map(colName -> Pair.of(colName,
StatisticsRepository.queryColumnHistogramByName(tableIf.getId(), colName)))
.collect(Collectors.toList());
List<Pair<String, Histogram>> columnStatistics = Lists.newArrayList();
resultSet = showColumnHistStmt.constructResultSet(columnStatistics);
}
@ -2921,12 +2932,19 @@ public class ShowExecutor {
private void handleShowAnalyzeTaskStatus() {
ShowAnalyzeTaskStatus showStmt = (ShowAnalyzeTaskStatus) stmt;
AnalysisInfo jobInfo = Env.getCurrentEnv().getAnalysisManager().findJobInfo(showStmt.getJobId());
TableIf table = StatisticsUtil.findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId);
List<AnalysisInfo> analysisInfos = Env.getCurrentEnv().getAnalysisManager().findTasks(showStmt.getJobId());
List<List<String>> rows = new ArrayList<>();
for (AnalysisInfo analysisInfo : analysisInfos) {
List<String> row = new ArrayList<>();
row.add(String.valueOf(analysisInfo.taskId));
row.add(analysisInfo.colName);
if (StatisticsUtil.isMvColumn(table, analysisInfo.colName)) {
row.add(((OlapTable) table).getIndexNameById(analysisInfo.indexId));
} else {
row.add("N/A");
}
row.add(analysisInfo.message);
row.add(TimeUtils.DATETIME_FORMAT.format(
LocalDateTime.ofInstant(Instant.ofEpochMilli(analysisInfo.lastExecTimeInMs),

View File

@ -17,6 +17,7 @@
package org.apache.doris.statistics;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.persist.gson.GsonUtils;
@ -24,7 +25,6 @@ import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.gson.Gson;
import com.google.gson.annotations.SerializedName;
import com.google.gson.reflect.TypeToken;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -33,7 +33,6 @@ import org.apache.logging.log4j.core.util.CronExpression;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.lang.reflect.Type;
import java.text.ParseException;
import java.util.List;
import java.util.Map;
@ -310,16 +309,6 @@ public class AnalysisInfo implements Writable {
return gson.toJson(colToPartitions);
}
private static Map<String, Set<String>> getColToPartition(String colToPartitionStr) {
if (colToPartitionStr == null || colToPartitionStr.isEmpty()) {
return null;
}
Gson gson = new Gson();
Type type = new TypeToken<Map<String, Set<String>>>() {
}.getType();
return gson.fromJson(colToPartitionStr, type);
}
@Override
public void write(DataOutput out) throws IOException {
String json = GsonUtils.GSON.toJson(this);
@ -352,4 +341,8 @@ public class AnalysisInfo implements Writable {
state = AnalysisState.FAILED;
endTime = System.currentTimeMillis();
}
public TableIf getTable() {
return StatisticsUtil.findTable(catalogId, dbId, tblId);
}
}

View File

@ -191,4 +191,7 @@ public class AnalysisJob {
}
}
public AnalysisInfo getJobInfo() {
return jobInfo;
}
}

View File

@ -417,10 +417,17 @@ public class AnalysisManager implements Writable {
public void createTaskForEachColumns(AnalysisInfo jobInfo, Map<Long, BaseAnalysisTask> analysisTasks,
boolean isSync) throws DdlException {
Map<String, Set<String>> columnToPartitions = jobInfo.colToPartitions;
TableIf table = jobInfo.getTable();
for (Entry<String, Set<String>> entry : columnToPartitions.entrySet()) {
long indexId = -1;
long taskId = Env.getCurrentEnv().getNextId();
String colName = entry.getKey();
List<Long> indexIds = Lists.newArrayList();
// Get index id this column belongs to for OlapTable. Set it to -1 for baseIndex id.
if (StatisticsUtil.isMvColumn(table, colName)) {
OlapTable olapTable = (OlapTable) table;
indexIds = olapTable.getMvColumnIndexIds(colName);
} else {
indexIds.add(-1L);
}
AnalysisInfoBuilder colTaskInfoBuilder = new AnalysisInfoBuilder(jobInfo);
if (jobInfo.analysisType != AnalysisType.HISTOGRAM) {
colTaskInfoBuilder.setAnalysisType(AnalysisType.FUNDAMENTALS);
@ -428,14 +435,17 @@ public class AnalysisManager implements Writable {
colToParts.put(colName, entry.getValue());
colTaskInfoBuilder.setColToPartitions(colToParts);
}
AnalysisInfo analysisInfo = colTaskInfoBuilder.setColName(colName).setIndexId(indexId)
.setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build();
analysisTasks.put(taskId, createTask(analysisInfo));
jobInfo.addTaskId(taskId);
if (isSync) {
continue;
for (long indexId : indexIds) {
long taskId = Env.getCurrentEnv().getNextId();
AnalysisInfo analysisInfo = colTaskInfoBuilder.setColName(colName).setIndexId(indexId)
.setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build();
analysisTasks.put(taskId, createTask(analysisInfo));
jobInfo.addTaskId(taskId);
if (isSync) {
continue;
}
replayCreateAnalysisTask(analysisInfo);
}
replayCreateAnalysisTask(analysisInfo);
}
}
@ -670,7 +680,7 @@ public class AnalysisManager implements Writable {
long catalogId = table.getDatabase().getCatalog().getId();
long dbId = table.getDatabase().getId();
long tableId = table.getId();
Set<String> cols = table.getBaseSchema().stream().map(Column::getName).collect(Collectors.toSet());
Set<String> cols = table.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet());
invalidateLocalStats(catalogId, dbId, tableId, cols, tableStats);
// Drop stats ddl is master only operation.
invalidateRemoteStats(catalogId, dbId, tableId, cols, true);
@ -682,14 +692,25 @@ public class AnalysisManager implements Writable {
if (tableStats == null) {
return;
}
TableIf table = StatisticsUtil.findTable(catalogId, dbId, tableId);
StatisticsCache statisticsCache = Env.getCurrentEnv().getStatisticsCache();
if (columns == null) {
TableIf table = StatisticsUtil.findTable(catalogId, dbId, tableId);
columns = table.getBaseSchema().stream().map(Column::getName).collect(Collectors.toSet());
columns = table.getSchemaAllIndexes(false)
.stream().map(Column::getName).collect(Collectors.toSet());
}
for (String column : columns) {
tableStats.removeColumn(column);
statisticsCache.invalidate(tableId, -1, column);
List<Long> indexIds = Lists.newArrayList();
if (StatisticsUtil.isMvColumn(table, column)) {
OlapTable olapTable = (OlapTable) table;
indexIds = olapTable.getMvColumnIndexIds(column);
} else {
indexIds.add(-1L);
}
for (long indexId : indexIds) {
tableStats.removeColumn(column);
statisticsCache.invalidate(tableId, indexId, column);
}
}
tableStats.updatedTime = 0;
tableStats.userInjected = false;
@ -1036,6 +1057,10 @@ public class AnalysisManager implements Writable {
return idToAnalysisJob.get(id);
}
public AnalysisInfo findJobInfo(long id) {
return analysisJobInfoMap.get(id);
}
public void constructJob(AnalysisInfo jobInfo, Collection<? extends BaseAnalysisTask> tasks) {
AnalysisJob job = new AnalysisJob(jobInfo, tasks);
idToAnalysisJob.put(jobInfo.jobId, job);

View File

@ -63,7 +63,7 @@ public abstract class BaseAnalysisTask {
+ " SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) AS `max`, "
+ " ${dataSizeFunction} AS `data_size`, "
+ " NOW() AS `update_time` "
+ " FROM `${catalogName}`.`${dbName}`.`${tblName}`";
+ " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index}";
protected static final String LINEAR_ANALYZE_TEMPLATE = " SELECT "
+ "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS `id`, "
@ -80,7 +80,7 @@ public abstract class BaseAnalysisTask {
+ "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+ "NOW() "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${sampleHints} ${limit}";
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}";
protected static final String DUJ1_ANALYZE_TEMPLATE = "SELECT "
+ "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, "
@ -100,7 +100,7 @@ public abstract class BaseAnalysisTask {
+ "FROM ( "
+ " SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` "
+ " FROM "
+ " (SELECT `${colName}` FROM `${catalogName}`.`${dbName}`.`${tblName}` "
+ " (SELECT `${colName}` FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} "
+ " ${sampleHints} ${limit}) as `t0` "
+ " GROUP BY `t0`.`${colName}` "
+ ") as `t1` ";

View File

@ -164,6 +164,7 @@ public class ExternalAnalysisTask extends BaseAnalysisTask {
commonParams.put("sampleHints", getSampleHint());
commonParams.put("limit", "");
commonParams.put("scaleFactor", "1");
commonParams.put("index", "");
if (col != null) {
commonParams.put("type", col.getType().toString());
}

View File

@ -17,8 +17,11 @@
package org.apache.doris.statistics;
import org.apache.doris.analysis.CreateMaterializedViewStmt;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.MaterializedIndex;
import org.apache.doris.catalog.MaterializedIndexMeta;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.common.FeConstants;
@ -49,7 +52,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
private static final String BASIC_STATS_TEMPLATE = "SELECT "
+ "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) as min, "
+ "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) as max "
+ "FROM `${dbName}`.`${tblName}`";
+ "FROM `${dbName}`.`${tblName}` ${index}";
@VisibleForTesting
public OlapAnalysisTask() {
@ -89,13 +92,16 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
Pair<List<Long>, Long> pair = calcActualSampleTablets(tbl.isPartitionColumn(col.getName()));
LOG.info("Number of tablets selected {}, rows in tablets {}", pair.first.size(), pair.second);
List<Long> tabletIds = pair.first;
double scaleFactor = (double) tbl.getRowCount() / (double) pair.second;
long totalRowCount = info.indexId == -1
? tbl.getRowCount()
: ((OlapTable) tbl).getRowCountForIndex(info.indexId);
double scaleFactor = (double) totalRowCount / (double) pair.second;
// might happen if row count in fe metadata hasn't been updated yet
if (Double.isInfinite(scaleFactor) || Double.isNaN(scaleFactor)) {
LOG.warn("Scale factor is infinite or Nan, will set scale factor to 1.");
scaleFactor = 1;
tabletIds = Collections.emptyList();
pair.second = tbl.getRowCount();
pair.second = totalRowCount;
}
String tabletStr = tabletIds.stream()
.map(Object::toString)
@ -103,7 +109,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext(info.jobType.equals(JobType.SYSTEM))) {
// Get basic stats, including min and max.
ResultRow basicStats = collectBasicStat(r);
long rowCount = tbl.getRowCount();
String min = StatisticsUtil.escapeSQL(basicStats != null && basicStats.getValues().size() > 0
? basicStats.get(0) : null);
String max = StatisticsUtil.escapeSQL(basicStats != null && basicStats.getValues().size() > 1
@ -122,16 +127,17 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
params.put("colId", StatisticsUtil.escapeSQL(String.valueOf(info.colName)));
params.put("dataSizeFunction", getDataSizeFunction(col, false));
params.put("dbName", db.getFullName());
params.put("colName", info.colName);
params.put("colName", StatisticsUtil.escapeColumnName(info.colName));
params.put("tblName", tbl.getName());
params.put("scaleFactor", String.valueOf(scaleFactor));
params.put("sampleHints", tabletStr.isEmpty() ? "" : String.format("TABLET(%s)", tabletStr));
params.put("ndvFunction", getNdvFunction(String.valueOf(rowCount)));
params.put("ndvFunction", getNdvFunction(String.valueOf(totalRowCount)));
params.put("min", StatisticsUtil.quote(min));
params.put("max", StatisticsUtil.quote(max));
params.put("rowCount", String.valueOf(rowCount));
params.put("rowCount", String.valueOf(totalRowCount));
params.put("type", col.getType().toString());
params.put("limit", "");
params.put("index", getIndex());
if (needLimit()) {
// If the tablets to be sampled are too large, use limit to control the rows to read, and re-calculate
// the scaleFactor.
@ -148,7 +154,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
if (useLinearAnalyzeTemplate()) {
// For single unique key, use count as ndv.
if (isSingleUniqueKey()) {
params.put("ndvFunction", String.valueOf(rowCount));
params.put("ndvFunction", String.valueOf(totalRowCount));
} else {
params.put("ndvFunction", "ROUND(NDV(`${colName}`) * ${scaleFactor})");
}
@ -177,8 +183,9 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
}
Map<String, String> params = new HashMap<>();
params.put("dbName", db.getFullName());
params.put("colName", info.colName);
params.put("colName", StatisticsUtil.escapeColumnName(info.colName));
params.put("tblName", tbl.getName());
params.put("index", getIndex());
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
stmtExecutor = new StmtExecutor(context.connectContext, stringSubstitutor.replace(BASIC_STATS_TEMPLATE));
return stmtExecutor.executeInternalQuery().get(0);
@ -202,14 +209,24 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
params.put("dataSizeFunction", getDataSizeFunction(col, false));
params.put("catalogName", catalog.getName());
params.put("dbName", db.getFullName());
params.put("colName", String.valueOf(info.colName));
params.put("colName", StatisticsUtil.escapeColumnName(String.valueOf(info.colName)));
params.put("tblName", String.valueOf(tbl.getName()));
params.put("index", getIndex());
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
String collectColStats = stringSubstitutor.replace(COLLECT_COL_STATISTICS);
runQuery(collectColStats);
}
// Get sample tablets id and scale up scaleFactor
protected String getIndex() {
if (info.indexId == -1) {
return "";
} else {
OlapTable olapTable = (OlapTable) this.tbl;
return "index `" + olapTable.getIndexNameById(info.indexId) + "`";
}
}
// Get sample tablets id and sample row count
protected Pair<List<Long>, Long> calcActualSampleTablets(boolean forPartitionColumn) {
// Below code copied from OlapScanNode.java
long sampleRows; // The total number of sample rows
@ -224,8 +241,11 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
long actualSampledRowCount = 0;
boolean enough = false;
for (Partition p : olapTable.getPartitions()) {
List<Long> ids = p.getBaseIndex().getTabletIdsInOrder();
MaterializedIndex materializedIndex = info.indexId == -1 ? p.getBaseIndex() : p.getIndex(info.indexId);
if (materializedIndex == null) {
continue;
}
List<Long> ids = materializedIndex.getTabletIdsInOrder();
if (ids.isEmpty()) {
continue;
}
@ -233,11 +253,10 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
// Skip partitions with row count < row count / 2 expected to be sampled per partition.
// It can be expected to sample a smaller number of partitions to avoid uneven distribution
// of sampling results.
if (p.getBaseIndex().getRowCount() < (avgRowsPerPartition / 2)) {
if (materializedIndex.getRowCount() < (avgRowsPerPartition / 2)) {
continue;
}
MaterializedIndex baseIndex = p.getBaseIndex();
long avgRowsPerTablet = Math.max(baseIndex.getRowCount() / ids.size(), 1);
long avgRowsPerTablet = Math.max(materializedIndex.getRowCount() / ids.size(), 1);
long tabletCounts = Math.max(
avgRowsPerPartition / avgRowsPerTablet + (avgRowsPerPartition % avgRowsPerTablet != 0 ? 1 : 0), 1);
tabletCounts = Math.min(tabletCounts, ids.size());
@ -247,13 +266,13 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
int seekTid = (int) ((i + seek) % ids.size());
long tabletId = ids.get(seekTid);
sampleTabletIds.add(tabletId);
actualSampledRowCount += baseIndex.getTablet(tabletId).getRowCount(true);
actualSampledRowCount += materializedIndex.getTablet(tabletId).getRowCount(true);
if (actualSampledRowCount >= sampleRows && !forPartitionColumn) {
enough = true;
break;
}
}
totalRows += p.getBaseIndex().getRowCount();
totalRows += materializedIndex.getRowCount();
totalTablet += ids.size();
if (enough) {
break;
@ -308,8 +327,12 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
if (isSingleUniqueKey()) {
return true;
}
String columnName = col.getName();
if (columnName.startsWith(CreateMaterializedViewStmt.MATERIALIZED_VIEW_NAME_PREFIX)) {
columnName = columnName.substring(CreateMaterializedViewStmt.MATERIALIZED_VIEW_NAME_PREFIX.length());
}
Set<String> distributionColumns = tbl.getDistributionColumnNames();
return distributionColumns.size() == 1 && distributionColumns.contains(col.getName().toLowerCase());
return distributionColumns.size() == 1 && distributionColumns.contains(columnName.toLowerCase());
}
/**
@ -317,8 +340,25 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
* @return True if the table has a single unique/agg key. False otherwise.
*/
protected boolean isSingleUniqueKey() {
int keysNum = ((OlapTable) tbl).getKeysNum();
KeysType keysType = ((OlapTable) tbl).getKeysType();
OlapTable olapTable = (OlapTable) this.tbl;
List<Column> schema;
KeysType keysType;
if (info.indexId == -1) {
schema = olapTable.getBaseSchema();
keysType = olapTable.getKeysType();
} else {
MaterializedIndexMeta materializedIndexMeta = olapTable.getIndexIdToMeta().get(info.indexId);
schema = materializedIndexMeta.getSchema();
keysType = materializedIndexMeta.getKeysType();
}
int keysNum = 0;
for (Column column : schema) {
if (column.isKey()) {
keysNum += 1;
}
}
return col.isKey()
&& keysNum == 1
&& (keysType.equals(KeysType.UNIQUE_KEYS) || keysType.equals(KeysType.AGG_KEYS));

View File

@ -62,9 +62,11 @@ public class OlapScanStatsDerive extends BaseStatsDerive {
double rowCount = table.estimatedRowCount();
for (Map.Entry<Id, String> entry : slotIdToTableIdAndColumnName.entrySet()) {
String colName = entry.getValue();
// TODO. Get index id for materialized view.
ColumnStatistic statistic =
Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(
table.getDatabase().getCatalog().getId(), table.getDatabase().getId(), table.getId(), colName);
table.getDatabase().getCatalog().getId(),
table.getDatabase().getId(), table.getId(), -1, colName);
if (!statistic.isUnKnown) {
rowCount = statistic.count;
}

View File

@ -187,8 +187,9 @@ public class StatisticsAutoCollector extends StatisticsCollector {
.setDBId(db.getId())
.setTblId(table.getId())
.setColName(
table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName).collect(Collectors.joining(","))
table.getSchemaAllIndexes(false).stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName).collect(Collectors.joining(","))
)
.setAnalysisType(AnalysisInfo.AnalysisType.FUNDAMENTALS)
.setAnalysisMode(AnalysisInfo.AnalysisMode.INCREMENTAL)

View File

@ -90,26 +90,21 @@ public class StatisticsCache {
});
}
public ColumnStatistic getColumnStatistics(long catalogId, long dbId, long tblId, String colName) {
return getColumnStatistics(catalogId, dbId, tblId, -1, colName).orElse(ColumnStatistic.UNKNOWN);
}
public Optional<ColumnStatistic> getColumnStatistics(long catalogId, long dbId,
long tblId, long idxId, String colName) {
public ColumnStatistic getColumnStatistics(long catalogId, long dbId, long tblId, long idxId, String colName) {
ConnectContext ctx = ConnectContext.get();
if (ctx != null && ctx.getSessionVariable().internalSession) {
return Optional.empty();
return ColumnStatistic.UNKNOWN;
}
StatisticsCacheKey k = new StatisticsCacheKey(catalogId, dbId, tblId, idxId, colName);
try {
CompletableFuture<Optional<ColumnStatistic>> f = columnStatisticsCache.get(k);
if (f.isDone()) {
return f.get();
return f.get().orElse(ColumnStatistic.UNKNOWN);
}
} catch (Exception e) {
LOG.warn("Unexpected exception while returning ColumnStatistic", e);
}
return Optional.empty();
return ColumnStatistic.UNKNOWN;
}
public Histogram getHistogram(long tblId, String colName) {

View File

@ -41,18 +41,10 @@ public class StatisticsCacheKey {
private static final String DELIMITER = "-";
public StatisticsCacheKey(long tableId, String colName) {
this(tableId, -1, colName);
}
public StatisticsCacheKey(long tableId, long idxId, String colName) {
this(-1, -1, tableId, idxId, colName);
}
public StatisticsCacheKey(long catalogId, long dbId, long tableId) {
this(catalogId, dbId, tableId, -1, "");
}
public StatisticsCacheKey(long catalogId, long dbId, long tableId, long idxId, String colName) {
this.catalogId = catalogId;
this.dbId = dbId;

View File

@ -231,7 +231,7 @@ public class StatisticsCleaner extends MasterDaemon {
TableIf t = idToTbl.get(tblId);
String colId = statsId.colId;
if (t.getColumn(colId) == null) {
if (!StatisticsUtil.isMvColumn(t, colId) && t.getColumn(colId) == null) {
expiredStats.ids.add(id);
continue;
}

View File

@ -102,8 +102,8 @@ public class StatisticsRepository {
+ " ${inPredicate}"
+ " AND part_id IS NOT NULL";
public static ColumnStatistic queryColumnStatisticsByName(long tableId, String colName) {
ResultRow resultRow = queryColumnStatisticById(tableId, colName);
public static ColumnStatistic queryColumnStatisticsByName(long tableId, long indexId, String colName) {
ResultRow resultRow = queryColumnStatisticById(tableId, indexId, colName);
if (resultRow == null) {
return ColumnStatistic.UNKNOWN;
}
@ -126,17 +126,17 @@ public class StatisticsRepository {
Collectors.toList());
}
public static ResultRow queryColumnStatisticById(long tblId, String colName) {
return queryColumnStatisticById(tblId, colName, false);
public static ResultRow queryColumnStatisticById(long tblId, long indexId, String colName) {
return queryColumnStatisticById(tblId, indexId, colName, false);
}
public static ResultRow queryColumnHistogramById(long tblId, String colName) {
return queryColumnStatisticById(tblId, colName, true);
public static ResultRow queryColumnHistogramById(long tblId, long indexId, String colName) {
return queryColumnStatisticById(tblId, indexId, colName, true);
}
private static ResultRow queryColumnStatisticById(long tblId, String colName, boolean isHistogram) {
private static ResultRow queryColumnStatisticById(long tblId, long indexId, String colName, boolean isHistogram) {
Map<String, String> map = new HashMap<>();
String id = constructId(tblId, -1, colName);
String id = constructId(tblId, indexId, colName);
map.put("id", StatisticsUtil.escapeSQL(id));
List<ResultRow> rows = isHistogram ? StatisticsUtil.executeQuery(FETCH_COLUMN_HISTOGRAM_TEMPLATE, map) :
StatisticsUtil.executeQuery(FETCH_COLUMN_STATISTIC_TEMPLATE, map);
@ -158,8 +158,8 @@ public class StatisticsRepository {
return rows == null ? Collections.emptyList() : rows;
}
public static Histogram queryColumnHistogramByName(long tableId, String colName) {
ResultRow resultRow = queryColumnHistogramById(tableId, colName);
public static Histogram queryColumnHistogramByName(long tableId, long indexId, String colName) {
ResultRow resultRow = queryColumnHistogramById(tableId, indexId, colName);
if (resultRow == null) {
return Histogram.UNKNOWN;
}

View File

@ -19,6 +19,7 @@ package org.apache.doris.statistics.util;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.BoolLiteral;
import org.apache.doris.analysis.CreateMaterializedViewStmt;
import org.apache.doris.analysis.DateLiteral;
import org.apache.doris.analysis.DecimalLiteral;
import org.apache.doris.analysis.FloatLiteral;
@ -808,6 +809,13 @@ public class StatisticsUtil {
.replace("\\", "\\\\");
}
public static String escapeColumnName(String str) {
if (str == null) {
return null;
}
return str.replace("`", "``");
}
public static boolean isExternalTable(String catalogName, String dbName, String tblName) {
TableIf table;
try {
@ -981,4 +989,16 @@ public class StatisticsUtil {
}
}
/**
* Check if the given column name is a materialized view column.
* @param table
* @param columnName
* @return True for mv column.
*/
public static boolean isMvColumn(TableIf table, String columnName) {
return table instanceof OlapTable
&& columnName.startsWith(CreateMaterializedViewStmt.MATERIALIZED_VIEW_NAME_PREFIX)
|| columnName.startsWith(CreateMaterializedViewStmt.MATERIALIZED_VIEW_AGGREGATE_NAME_PREFIX);
}
}