branch-2.1: [improvement](statistics)Agg table set preagg on when doing sample analyzing. (#49918) (#51675)

backport: https://github.com/apache/doris/pull/49918
This commit is contained in:
James
2025-06-16 09:50:16 +08:00
committed by GitHub
parent e8615e1329
commit 8734c3fb3b
22 changed files with 743 additions and 157 deletions

View File

@ -161,6 +161,7 @@ public class StatementContext implements Closeable {
private final List<Expression> joinFilters = new ArrayList<>();
private final List<Hint> hints = new ArrayList<>();
private boolean hintForcePreAggOn = false;
// Map slot to its relation, currently used in SlotReference to find its original
// Relation for example LogicalOlapScan
@ -268,6 +269,14 @@ public class StatementContext implements Closeable {
this.needLockTables = needLockTables;
}
public void setHintForcePreAggOn(boolean preAggOn) {
this.hintForcePreAggOn = preAggOn;
}
public boolean isHintForcePreAggOn() {
return hintForcePreAggOn;
}
/**
* cache view info to avoid view's def and sql mode changed before lock it.
*

View File

@ -30,6 +30,7 @@ import org.apache.doris.nereids.rules.analysis.CheckPolicy;
import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint;
import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias;
import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant;
import org.apache.doris.nereids.rules.analysis.EliminateLogicalPreAggOnHint;
import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint;
import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots;
import org.apache.doris.nereids.rules.analysis.HavingToFilter;
@ -92,7 +93,8 @@ public class Analyzer extends AbstractBatchJobExecutor {
return jobs(
// we should eliminate hint before "Subquery unnesting".
topDown(new AnalyzeCTE()),
topDown(new EliminateLogicalSelectHint()),
topDown(new EliminateLogicalSelectHint(),
new EliminateLogicalPreAggOnHint()),
bottomUp(
new BindRelation(),
new CheckPolicy()

View File

@ -445,6 +445,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalIntersect;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat;
import org.apache.doris.nereids.trees.plans.logical.LogicalSelectHint;
@ -1412,12 +1413,15 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
return selectPlan;
}
List<ParserRuleContext> selectHintContexts = Lists.newArrayList();
List<ParserRuleContext> preAggOnHintContexts = Lists.newArrayList();
for (Integer key : selectHintMap.keySet()) {
if (key > selectCtx.getStart().getStopIndex() && key < selectCtx.getStop().getStartIndex()) {
selectHintContexts.add(selectHintMap.get(key));
} else {
preAggOnHintContexts.add(selectHintMap.get(key));
}
}
return withSelectHint(selectPlan, selectHintContexts);
return withHints(selectPlan, selectHintContexts, preAggOnHintContexts);
});
}
@ -3195,91 +3199,111 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
return last;
}
private LogicalPlan withSelectHint(LogicalPlan logicalPlan, List<ParserRuleContext> hintContexts) {
if (hintContexts.isEmpty()) {
private LogicalPlan withHints(LogicalPlan logicalPlan, List<ParserRuleContext> selectHintContexts,
List<ParserRuleContext> preAggOnHintContexts) {
if (selectHintContexts.isEmpty() && preAggOnHintContexts.isEmpty()) {
return logicalPlan;
}
ImmutableList.Builder<SelectHint> hints = ImmutableList.builder();
for (ParserRuleContext hintContext : hintContexts) {
SelectHintContext selectHintContext = (SelectHintContext) hintContext;
for (HintStatementContext hintStatement : selectHintContext.hintStatements) {
String hintName = hintStatement.hintName.getText().toLowerCase(Locale.ROOT);
switch (hintName) {
case "set_var":
Map<String, Optional<String>> parameters = Maps.newLinkedHashMap();
for (HintAssignmentContext kv : hintStatement.parameters) {
if (kv.key != null) {
String parameterName = visitIdentifierOrText(kv.key);
Optional<String> value = Optional.empty();
if (kv.constantValue != null) {
Literal literal = (Literal) visit(kv.constantValue);
value = Optional.ofNullable(literal.toLegacyLiteral().getStringValue());
} else if (kv.identifierValue != null) {
// maybe we should throw exception when the identifierValue is quoted identifier
value = Optional.ofNullable(kv.identifierValue.getText());
LogicalPlan newPlan = logicalPlan;
if (!selectHintContexts.isEmpty()) {
ImmutableList.Builder<SelectHint> hints = ImmutableList.builder();
for (ParserRuleContext hintContext : selectHintContexts) {
SelectHintContext selectHintContext = (SelectHintContext) hintContext;
for (HintStatementContext hintStatement : selectHintContext.hintStatements) {
String hintName = hintStatement.hintName.getText().toLowerCase(Locale.ROOT);
switch (hintName) {
case "set_var":
Map<String, Optional<String>> parameters = Maps.newLinkedHashMap();
for (HintAssignmentContext kv : hintStatement.parameters) {
if (kv.key != null) {
String parameterName = visitIdentifierOrText(kv.key);
Optional<String> value = Optional.empty();
if (kv.constantValue != null) {
Literal literal = (Literal) visit(kv.constantValue);
value = Optional.ofNullable(literal.toLegacyLiteral().getStringValue());
} else if (kv.identifierValue != null) {
// maybe we should throw exception when the identifierValue is quoted identifier
value = Optional.ofNullable(kv.identifierValue.getText());
}
parameters.put(parameterName, value);
}
parameters.put(parameterName, value);
}
}
SelectHintSetVar setVar = new SelectHintSetVar(hintName, parameters);
setVar.setVarOnceInSql(ConnectContext.get().getStatementContext());
hints.add(setVar);
break;
case "leading":
List<String> leadingParameters = new ArrayList<String>();
for (HintAssignmentContext kv : hintStatement.parameters) {
if (kv.key != null) {
SelectHintSetVar setVar = new SelectHintSetVar(hintName, parameters);
setVar.setVarOnceInSql(ConnectContext.get().getStatementContext());
hints.add(setVar);
break;
case "leading":
List<String> leadingParameters = new ArrayList<String>();
for (HintAssignmentContext kv : hintStatement.parameters) {
if (kv.key != null) {
String parameterName = visitIdentifierOrText(kv.key);
leadingParameters.add(parameterName);
}
}
hints.add(new SelectHintLeading(hintName, leadingParameters));
break;
case "ordered":
hints.add(new SelectHintOrdered(hintName));
break;
case "use_cbo_rule":
List<String> useRuleParameters = new ArrayList<String>();
for (HintAssignmentContext kv : hintStatement.parameters) {
String parameterName = visitIdentifierOrText(kv.key);
leadingParameters.add(parameterName);
useRuleParameters.add(parameterName);
}
}
hints.add(new SelectHintLeading(hintName, leadingParameters));
break;
case "ordered":
hints.add(new SelectHintOrdered(hintName));
break;
case "use_cbo_rule":
List<String> useRuleParameters = new ArrayList<String>();
for (HintAssignmentContext kv : hintStatement.parameters) {
String parameterName = visitIdentifierOrText(kv.key);
useRuleParameters.add(parameterName);
}
hints.add(new SelectHintUseCboRule(hintName, useRuleParameters, false));
break;
case "no_use_cbo_rule":
List<String> noUseRuleParameters = new ArrayList<String>();
for (HintAssignmentContext kv : hintStatement.parameters) {
String parameterName = visitIdentifierOrText(kv.key);
noUseRuleParameters.add(parameterName);
}
hints.add(new SelectHintUseCboRule(hintName, noUseRuleParameters, true));
break;
case "use_mv":
List<String> useIndexParameters = new ArrayList<String>();
for (HintAssignmentContext kv : hintStatement.parameters) {
String parameterName = visitIdentifierOrText(kv.key);
if (kv.key != null) {
useIndexParameters.add(parameterName);
hints.add(new SelectHintUseCboRule(hintName, useRuleParameters, false));
break;
case "no_use_cbo_rule":
List<String> noUseRuleParameters = new ArrayList<String>();
for (HintAssignmentContext kv : hintStatement.parameters) {
String parameterName = visitIdentifierOrText(kv.key);
noUseRuleParameters.add(parameterName);
}
}
hints.add(new SelectHintUseMv(hintName, useIndexParameters, true));
break;
case "no_use_mv":
List<String> noUseIndexParameters = new ArrayList<String>();
for (HintAssignmentContext kv : hintStatement.parameters) {
String parameterName = visitIdentifierOrText(kv.key);
if (kv.key != null) {
noUseIndexParameters.add(parameterName);
hints.add(new SelectHintUseCboRule(hintName, noUseRuleParameters, true));
break;
case "use_mv":
List<String> useIndexParameters = new ArrayList<String>();
for (HintAssignmentContext kv : hintStatement.parameters) {
String parameterName = visitIdentifierOrText(kv.key);
if (kv.key != null) {
useIndexParameters.add(parameterName);
}
}
hints.add(new SelectHintUseMv(hintName, useIndexParameters, true));
break;
case "no_use_mv":
List<String> noUseIndexParameters = new ArrayList<String>();
for (HintAssignmentContext kv : hintStatement.parameters) {
String parameterName = visitIdentifierOrText(kv.key);
if (kv.key != null) {
noUseIndexParameters.add(parameterName);
}
}
hints.add(new SelectHintUseMv(hintName, noUseIndexParameters, false));
break;
default:
break;
}
}
}
newPlan = new LogicalSelectHint<>(hints.build(), newPlan);
}
if (!preAggOnHintContexts.isEmpty()) {
for (ParserRuleContext hintContext : preAggOnHintContexts) {
if (hintContext instanceof SelectHintContext) {
SelectHintContext preAggOnHintContext = (SelectHintContext) hintContext;
if (preAggOnHintContext.hintStatement != null
&& preAggOnHintContext.hintStatement.hintName != null) {
String text = preAggOnHintContext.hintStatement.hintName.getText();
if (text.equalsIgnoreCase("PREAGGOPEN")) {
newPlan = new LogicalPreAggOnHint<>(newPlan);
break;
}
hints.add(new SelectHintUseMv(hintName, noUseIndexParameters, false));
break;
default:
break;
}
}
}
}
return new LogicalSelectHint<>(hints.build(), logicalPlan);
return newPlan;
}
@Override

View File

@ -120,6 +120,7 @@ public enum RuleType {
ELIMINATE_GROUP_BY_CONSTANT(RuleTypeClass.REWRITE),
ELIMINATE_LOGICAL_SELECT_HINT(RuleTypeClass.REWRITE),
ELIMINATE_LOGICAL_PRE_AGG_ON_HINT(RuleTypeClass.REWRITE),
ELIMINATE_ORDER_BY_CONSTANT(RuleTypeClass.REWRITE),
ELIMINATE_ORDER_BY_UNDER_SUBQUERY(RuleTypeClass.REWRITE),
ELIMINATE_ORDER_BY_UNDER_VIEW(RuleTypeClass.REWRITE),

View File

@ -95,6 +95,8 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.commons.collections.CollectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
@ -104,6 +106,7 @@ import java.util.Optional;
* Rule to bind relations in query plan.
*/
public class BindRelation extends OneAnalysisRuleFactory {
private static final Logger LOG = LogManager.getLogger(StatementContext.class);
public BindRelation() {}
@ -179,7 +182,8 @@ public class BindRelation extends OneAnalysisRuleFactory {
return getLogicalPlan(table, unboundRelation, tableQualifier, cascadesContext);
}
private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, List<String> qualifier) {
private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, List<String> qualifier,
CascadesContext cascadesContext) {
LogicalOlapScan scan;
List<Long> partIds = getPartitionIds(table, unboundRelation, qualifier);
List<Long> tabletIds = unboundRelation.getTabletIds();
@ -215,6 +219,9 @@ public class BindRelation extends OneAnalysisRuleFactory {
// This tabletIds is set manually, so need to set specifiedTabletIds
scan = scan.withManuallySpecifiedTabletIds(tabletIds);
}
if (cascadesContext.getStatementContext().isHintForcePreAggOn()) {
return scan.withPreAggStatus(PreAggStatus.on());
}
if (needGenerateLogicalAggForRandomDistAggTable(scan)) {
// it's a random distribution agg table
// add agg on olap scan
@ -381,7 +388,7 @@ public class BindRelation extends OneAnalysisRuleFactory {
switch (table.getType()) {
case OLAP:
case MATERIALIZED_VIEW:
return makeOlapScan(table, unboundRelation, qualifierWithoutTableName);
return makeOlapScan(table, unboundRelation, qualifierWithoutTableName, cascadesContext);
case VIEW:
View view = (View) table;
isView = true;

View File

@ -0,0 +1,39 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.analysis;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.rules.rewrite.OneRewriteRuleFactory;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint;
/**
* eliminate logical common hint and set them to cascade context
*/
public class EliminateLogicalPreAggOnHint extends OneRewriteRuleFactory {
@Override
public Rule build() {
return logicalPreAggOnHint().thenApply(ctx -> {
LogicalPreAggOnHint<Plan> preAggHintPlan = ctx.root;
ctx.statementContext.setHintForcePreAggOn(true);
return preAggHintPlan.child();
}).toRule(RuleType.ELIMINATE_LOGICAL_PRE_AGG_ON_HINT);
}
}

View File

@ -62,6 +62,7 @@ public enum PlanType {
LOGICAL_APPLY,
LOGICAL_ASSERT_NUM_ROWS,
LOGICAL_CHECK_POLICY,
LOGICAL_COMMON_HINT,
LOGICAL_CTE,
LOGICAL_CTE_ANCHOR,
LOGICAL_CTE_PRODUCER,

View File

@ -41,6 +41,7 @@ import org.apache.doris.nereids.rules.analysis.AnalyzeCTE;
import org.apache.doris.nereids.rules.analysis.BindExpression;
import org.apache.doris.nereids.rules.analysis.BindRelation;
import org.apache.doris.nereids.rules.analysis.CheckPolicy;
import org.apache.doris.nereids.rules.analysis.EliminateLogicalPreAggOnHint;
import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
@ -310,7 +311,8 @@ public class BaseViewInfo {
private static List<RewriteJob> buildAnalyzeViewJobsForStar() {
return jobs(
topDown(new EliminateLogicalSelectHint()),
topDown(new EliminateLogicalSelectHint(),
new EliminateLogicalPreAggOnHint()),
topDown(new AnalyzeCTE()),
bottomUp(
new BindRelation(),

View File

@ -0,0 +1,88 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.trees.plans.logical;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.BlockFuncDepsPropagation;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import java.util.List;
import java.util.Optional;
/**
* PreAggOn hint.
*/
public class LogicalPreAggOnHint<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_TYPE>
implements BlockFuncDepsPropagation {
public LogicalPreAggOnHint(CHILD_TYPE child) {
this(Optional.empty(), Optional.empty(), child);
}
/**
* LogicalCommonHint's full parameter constructor.
* @param groupExpression groupExpression exists when this plan is copy out from memo.
* @param logicalProperties logicalProperties is use for compute output
* @param child child plan
*/
public LogicalPreAggOnHint(Optional<GroupExpression> groupExpression,
Optional<LogicalProperties> logicalProperties, CHILD_TYPE child) {
super(PlanType.LOGICAL_COMMON_HINT, groupExpression, logicalProperties, child);
}
@Override
public LogicalPreAggOnHint<Plan> withChildren(List<Plan> children) {
Preconditions.checkArgument(children.size() == 1);
return new LogicalPreAggOnHint<>(children.get(0));
}
@Override
public <R, C> R accept(PlanVisitor<R, C> visitor, C context) {
return visitor.visitLogicalPreAggOnHint(this, context);
}
@Override
public List<? extends Expression> getExpressions() {
return ImmutableList.of();
}
@Override
public Plan withGroupExpression(Optional<GroupExpression> groupExpression) {
return new LogicalPreAggOnHint<>(groupExpression, Optional.of(getLogicalProperties()), child());
}
@Override
public Plan withGroupExprLogicalPropChildren(Optional<GroupExpression> groupExpression,
Optional<LogicalProperties> logicalProperties, List<Plan> children) {
Preconditions.checkArgument(children.size() == 1);
return new LogicalPreAggOnHint<>(groupExpression, logicalProperties, children.get(0));
}
@Override
public List<Slot> computeOutput() {
return child().getOutput();
}
}

View File

@ -38,6 +38,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalIntersect;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
import org.apache.doris.nereids.trees.plans.logical.LogicalPartitionTopN;
import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.trees.plans.logical.LogicalRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat;
@ -209,6 +210,10 @@ public abstract class PlanVisitor<R, C> implements CommandVisitor<R, C>, Relatio
return visit(hint, context);
}
public R visitLogicalPreAggOnHint(LogicalPreAggOnHint<? extends Plan> hint, C context) {
return visit(hint, context);
}
public R visitLogicalSetOperation(LogicalSetOperation setOperation, C context) {
return visit(setOperation, context);
}

View File

@ -354,7 +354,13 @@ public class AnalysisManager implements Writable {
long periodTimeInMs = stmt.getPeriodTimeInMs();
infoBuilder.setPeriodTimeInMs(periodTimeInMs);
List<Pair<String, String>> jobColumns = table.getColumnIndexPairs(columnNames);
OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null;
boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE);
List<Pair<String, String>> jobColumns = table.getColumnIndexPairs(columnNames).stream()
.filter(c -> olapTable == null || StatisticsUtil.canCollectColumn(
olapTable.getIndexMetaByIndexId(olapTable.getIndexIdByName(c.first)).getColumnByName(c.second),
table, isSampleAnalyze, olapTable.getIndexIdByName(c.first)))
.collect(Collectors.toList());
infoBuilder.setJobColumns(jobColumns);
StringJoiner stringJoiner = new StringJoiner(",", "[", "]");
for (Pair<String, String> pair : jobColumns) {

View File

@ -80,7 +80,8 @@ public abstract class BaseAnalysisTask {
+ "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+ "NOW() FROM ( "
+ "SELECT * FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as t";
+ "SELECT * FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) "
+ "as t ${preAggHint}";
protected static final String DUJ1_ANALYZE_TEMPLATE = "SELECT "
+ "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, "
@ -101,7 +102,8 @@ public abstract class BaseAnalysisTask {
+ " SELECT t0.`colValue` as `column_key`, COUNT(1) as `count`, SUM(`len`) as `column_length` "
+ " FROM "
+ " (SELECT ${subStringColName} AS `colValue`, LENGTH(`${colName}`) as `len` "
+ " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as `t0` "
+ " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as `t0`"
+ " ${preAggHint}"
+ " GROUP BY `t0`.`colValue` "
+ ") as `t1` ";

View File

@ -173,6 +173,7 @@ public class ExternalAnalysisTask extends BaseAnalysisTask {
commonParams.put("limit", "");
commonParams.put("scaleFactor", "1");
commonParams.put("index", "");
commonParams.put("preAggHint", "");
if (col != null) {
commonParams.put("type", col.getType().toString());
}

View File

@ -137,13 +137,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
}
protected ResultRow collectMinMax() {
// Agg table value columns has no zone map.
// For these columns, skip collecting min and max value to avoid scan whole table.
if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS) && !col.isKey()) {
LOG.info("Aggregation table {} column {} is not a key column, skip collecting min and max.",
tbl.getName(), col.getName());
return null;
}
long startTime = System.currentTimeMillis();
Map<String, String> params = buildSqlParams();
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
@ -265,6 +258,13 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
params.put("type", col.getType().toString());
params.put("limit", "");
// For agg table and mor unique table, set PREAGGOPEN preAggHint.
if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS)
|| ((OlapTable) tbl).getKeysType().equals(KeysType.UNIQUE_KEYS)
&& !((OlapTable) tbl).isUniqKeyMergeOnWrite()) {
params.put("preAggHint", "/*+PREAGGOPEN*/");
}
// If table row count is less than the target sample row count, simple scan the full table.
if (tableRowCount <= targetSampleRows) {
scanFullTable = true;
@ -334,6 +334,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
params.put("colName", StatisticsUtil.escapeColumnName(String.valueOf(info.colName)));
params.put("tblName", String.valueOf(tbl.getName()));
params.put("index", getIndex());
params.put("preAggHint", "");
return params;
}

View File

@ -145,13 +145,21 @@ public class StatisticsAutoCollector extends MasterDaemon {
}
return;
}
boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE);
OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null;
List<Pair<String, String>> needRunColumns = table.getColumnIndexPairs(
table.getSchemaAllIndexes(false)
.stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName)
.collect(Collectors.toSet()));
if (needRunColumns == null || needRunColumns.isEmpty()) {
.collect(Collectors.toSet()))
.stream()
.filter(c -> olapTable == null || StatisticsUtil.canCollectColumn(
olapTable.getIndexMetaByIndexId(olapTable.getIndexIdByName(c.first)).getColumnByName(c.second),
table, isSampleAnalyze, olapTable.getIndexIdByName(c.first)))
.collect(Collectors.toList());
if (needRunColumns.isEmpty()) {
return;
}
StringJoiner stringJoiner = new StringJoiner(",", "[", "]");
@ -176,7 +184,7 @@ public class StatisticsAutoCollector extends MasterDaemon {
protected AnalysisInfo createAnalysisInfo(TableIf table, AnalysisMethod analysisMethod, long rowCount,
String colNames, List<Pair<String, String>> needRunColumns, JobPriority priority) {
AnalysisInfo jobInfo = new AnalysisInfoBuilder()
return new AnalysisInfoBuilder()
.setJobId(Env.getCurrentEnv().getNextId())
.setCatalogId(table.getDatabase().getCatalog().getId())
.setDBId(table.getDatabase().getId())
@ -200,7 +208,6 @@ public class StatisticsAutoCollector extends MasterDaemon {
.setPriority(priority)
.setTableVersion(table instanceof OlapTable ? ((OlapTable) table).getVisibleVersion() : 0)
.build();
return jobInfo;
}
// Analysis job created by the system

View File

@ -180,17 +180,24 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {
if (tableIf != null) {
if (tableIf instanceof OlapTable) {
rowCount = analyzedJob.rowCount;
OlapTable olapTable = (OlapTable) tableIf;
indexesRowCount.putAll(analyzedJob.indexesRowCount);
clearStaleIndexRowCount((OlapTable) tableIf);
}
if (analyzedJob.jobColumns.containsAll(
tableIf.getColumnIndexPairs(
tableIf.getSchemaAllIndexes(false).stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName).collect(Collectors.toSet())))) {
updatedRows.set(0);
newPartitionLoaded.set(false);
clearStaleIndexRowCount(olapTable);
if (analyzedJob.jobColumns.containsAll(
olapTable.getColumnIndexPairs(olapTable.getSchemaAllIndexes(false)
.stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName).collect(Collectors.toSet()))
.stream()
.filter(c -> StatisticsUtil.canCollectColumn(olapTable.getIndexMetaByIndexId(
olapTable.getIndexIdByName(c.first)).getColumnByName(c.second),
olapTable, true, olapTable.getIndexIdByName(c.first)))
.collect(Collectors.toSet()))) {
updatedRows.set(0);
newPartitionLoaded.set(false);
}
}
rowCount = analyzedJob.rowCount;
// Set userInject back to false after manual analyze.
if (JobType.MANUAL.equals(jobType) && !analyzedJob.userInject) {
userInjected = false;

View File

@ -37,6 +37,7 @@ import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.MapType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
@ -668,6 +669,25 @@ public class StatisticsUtil {
|| type instanceof AggStateType;
}
public static boolean canCollectColumn(Column c, TableIf table, boolean isSampleAnalyze, long indexId) {
// Full analyze can collect all columns.
if (!isSampleAnalyze) {
return true;
}
// External table can collect all columns.
if (!(table instanceof OlapTable)) {
return true;
}
OlapTable olapTable = (OlapTable) table;
// Skip agg table value columns
KeysType keysType = olapTable.getIndexMetaByIndexId(indexId).getKeysType();
if (KeysType.AGG_KEYS.equals(keysType) && !c.isKey()) {
return false;
}
// Skip mor unique table value columns
return !KeysType.UNIQUE_KEYS.equals(keysType) || olapTable.isUniqKeyMergeOnWrite() || c.isKey();
}
public static void sleep(long millis) {
try {
Thread.sleep(millis);

View File

@ -22,6 +22,7 @@ import org.apache.doris.analysis.TableSample;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DataProperty;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.MaterializedIndex;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
@ -168,7 +169,7 @@ public class OlapAnalysisTaskTest {
+ "${ndvFunction} as `ndv`, ROUND(SUM(CASE WHEN `null` IS NULL THEN 1 ELSE 0 END) * ${scaleFactor}) AS `null_count`, "
+ "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+ "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() FROM "
+ "( SELECT * FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as t", sql);
+ "( SELECT * FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as t ", sql);
return;
}
};
@ -196,7 +197,8 @@ public class OlapAnalysisTaskTest {
+ "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() "
+ "FROM ( SELECT t0.`colValue` as `column_key`, COUNT(1) as `count`, SUM(`len`) as `column_length` "
+ "FROM (SELECT ${subStringColName} AS `colValue`, LENGTH(`null`) as `len` "
+ "FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as `t0` GROUP BY `t0`.`colValue` ) as `t1` ", sql);
+ "FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as `t0` "
+ " GROUP BY `t0`.`colValue` ) as `t1` ", sql);
return;
}
@ -365,22 +367,70 @@ public class OlapAnalysisTaskTest {
return false;
}
};
task.col = new Column("test", PrimitiveType.INT);
new MockUp<OlapTable>() {
@Mock
public KeysType getKeysType() {
return KeysType.DUP_KEYS;
}
};
task.col = new Column("testColumn", Type.INT, true, null, null, "");
task.setTable(new OlapTable());
task.getSampleParams(params, 10);
Assertions.assertTrue(task.scanFullTable());
Assertions.assertEquals("1.0", params.get("scaleFactor"));
Assertions.assertEquals("", params.get("sampleHints"));
Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(`t1`.`column_key`) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) + SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) * SUM(`t1`.`count`) / 10)", params.get("ndvFunction"));
Assertions.assertNull(params.get("preAggHint"));
params.clear();
new MockUp<OlapTable>() {
@Mock
public KeysType getKeysType() {
return KeysType.AGG_KEYS;
}
};
task = new OlapAnalysisTask();
task.col = new Column("test", PrimitiveType.INT);
task.col = new Column("testColumn", Type.INT, false, null, null, "");
task.setTable(new OlapTable());
task.getSampleParams(params, 1000);
Assertions.assertEquals("10.0", params.get("scaleFactor"));
Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints"));
Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(`t1`.`column_key`) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) + SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) * SUM(`t1`.`count`) / 1000)", params.get("ndvFunction"));
Assertions.assertEquals("SUM(t1.count) * 4", params.get("dataSizeFunction"));
Assertions.assertEquals("`${colName}`", params.get("subStringColName"));
Assertions.assertEquals("/*+PREAGGOPEN*/", params.get("preAggHint"));
params.clear();
new MockUp<OlapTable>() {
@Mock
public KeysType getKeysType() {
return KeysType.UNIQUE_KEYS;
}
@Mock
public boolean isUniqKeyMergeOnWrite() {
return false;
}
};
task = new OlapAnalysisTask();
task.col = new Column("testColumn", Type.INT, false, null, null, "");
task.setTable(new OlapTable());
task.getSampleParams(params, 1000);
Assertions.assertEquals("/*+PREAGGOPEN*/", params.get("preAggHint"));
params.clear();
new MockUp<OlapTable>() {
@Mock
public boolean isUniqKeyMergeOnWrite() {
return true;
}
};
task = new OlapAnalysisTask();
task.col = new Column("testColumn", Type.INT, false, null, null, "");
task.setTable(new OlapTable());
task.getSampleParams(params, 1000);
Assertions.assertNull(params.get("preAggHint"));
params.clear();
new MockUp<OlapAnalysisTask>() {
@ -397,6 +447,7 @@ public class OlapAnalysisTaskTest {
task = new OlapAnalysisTask();
task.col = new Column("test", PrimitiveType.INT);
task.setTable(new OlapTable());
task.getSampleParams(params, 1000);
Assertions.assertEquals("10.0", params.get("scaleFactor"));
Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints"));
@ -411,6 +462,7 @@ public class OlapAnalysisTaskTest {
};
task = new OlapAnalysisTask();
task.col = new Column("test", PrimitiveType.INT);
task.setTable(new OlapTable());
task.getSampleParams(params, 1000);
Assertions.assertEquals("10.0", params.get("scaleFactor"));
Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints"));
@ -430,6 +482,7 @@ public class OlapAnalysisTaskTest {
};
task = new OlapAnalysisTask();
task.col = new Column("test", PrimitiveType.INT);
task.setTable(new OlapTable());
task.getSampleParams(params, 1000);
Assertions.assertEquals("20.0", params.get("scaleFactor"));
Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints"));
@ -441,6 +494,7 @@ public class OlapAnalysisTaskTest {
task.col = new Column("test", Type.fromPrimitiveType(PrimitiveType.INT),
true, null, null, null);
task.setKeyColumnSampleTooManyRows(true);
task.setTable(new OlapTable());
task.getSampleParams(params, 2000000000);
Assertions.assertEquals("2.0", params.get("scaleFactor"));
Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints"));

View File

@ -17,6 +17,9 @@
package org.apache.doris.statistics.util;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.MaterializedIndexMeta;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Table;
import org.apache.doris.catalog.Type;
@ -24,6 +27,9 @@ import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
import org.apache.doris.common.Pair;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.hive.HMSExternalCatalog;
import org.apache.doris.datasource.hive.HMSExternalDatabase;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.qe.SessionVariable;
import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
@ -31,6 +37,7 @@ import org.apache.doris.statistics.AnalysisInfo.JobType;
import org.apache.doris.statistics.ColStatsMeta;
import org.apache.doris.statistics.ResultRow;
import org.apache.doris.statistics.TableStatsMeta;
import org.apache.doris.thrift.TStorageType;
import com.google.common.collect.Lists;
import mockit.Mock;
@ -43,6 +50,7 @@ import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
@ -235,4 +243,58 @@ class StatisticsUtilTest {
tableMeta.setColToColStatsMeta(colToColStatsMeta);
Assertions.assertFalse(StatisticsUtil.tableNotAnalyzedForTooLong(olapTable, tableMeta));
}
@Test
void testCanCollectColumn() {
Column column = new Column("testColumn", Type.INT, true, null, null, "");
List<Column> schema = new ArrayList<>();
schema.add(column);
OlapTable table = new OlapTable(200, "testTable", schema, KeysType.AGG_KEYS, null, null);
// Test full analyze always return true;
Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, false, 1));
// Test null table return true;
Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, null, true, 1));
// Test external table always return true;
HMSExternalCatalog externalCatalog = new HMSExternalCatalog();
HMSExternalDatabase externalDatabase = new HMSExternalDatabase(externalCatalog, 1L, "dbName", "dbName");
HMSExternalTable hmsTable = new HMSExternalTable(1, "name", "name", externalCatalog, externalDatabase);
Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, hmsTable, true, 1));
// Test agg key return true;
MaterializedIndexMeta meta = new MaterializedIndexMeta(1L, schema, 1, 1, (short) 1, TStorageType.COLUMN, KeysType.AGG_KEYS, null);
new MockUp<OlapTable>() {
@Mock
public MaterializedIndexMeta getIndexMetaByIndexId(long indexId) {
return meta;
}
};
Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, true, 1));
// Test agg value return false
column = new Column("testColumn", Type.INT, false, null, null, "");
Assertions.assertFalse(StatisticsUtil.canCollectColumn(column, table, true, 1));
// Test unique mor value column return false
MaterializedIndexMeta meta1 = new MaterializedIndexMeta(1L, schema, 1, 1, (short) 1, TStorageType.COLUMN, KeysType.UNIQUE_KEYS, null);
new MockUp<OlapTable>() {
@Mock
public MaterializedIndexMeta getIndexMetaByIndexId(long indexId) {
return meta1;
}
@Mock
public boolean isUniqKeyMergeOnWrite() {
return false;
}
};
Assertions.assertFalse(StatisticsUtil.canCollectColumn(column, table, true, 1));
// Test unique mor key column return true
column = new Column("testColumn", Type.INT, true, null, null, "");
Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, true, 1));
}
}