From 8734c3fb3b28d97cf79551bf8a95c02e4b22adad Mon Sep 17 00:00:00 2001 From: James Date: Mon, 16 Jun 2025 09:50:16 +0800 Subject: [PATCH] branch-2.1: [improvement](statistics)Agg table set preagg on when doing sample analyzing. (#49918) (#51675) backport: https://github.com/apache/doris/pull/49918 --- .../doris/nereids/StatementContext.java | 9 + .../doris/nereids/jobs/executor/Analyzer.java | 4 +- .../nereids/parser/LogicalPlanBuilder.java | 172 +++++----- .../apache/doris/nereids/rules/RuleType.java | 1 + .../nereids/rules/analysis/BindRelation.java | 11 +- .../EliminateLogicalPreAggOnHint.java | 39 +++ .../doris/nereids/trees/plans/PlanType.java | 1 + .../plans/commands/info/BaseViewInfo.java | 4 +- .../plans/logical/LogicalPreAggOnHint.java | 88 ++++++ .../trees/plans/visitor/PlanVisitor.java | 5 + .../doris/statistics/AnalysisManager.java | 8 +- .../doris/statistics/BaseAnalysisTask.java | 6 +- .../statistics/ExternalAnalysisTask.java | 1 + .../doris/statistics/OlapAnalysisTask.java | 15 +- .../statistics/StatisticsAutoCollector.java | 15 +- .../doris/statistics/TableStatsMeta.java | 25 +- .../doris/statistics/util/StatisticsUtil.java | 20 ++ .../statistics/OlapAnalysisTaskTest.java | 62 +++- .../statistics/util/StatisticsUtilTest.java | 62 ++++ .../suites/statistics/analyze_stats.groovy | 3 +- .../suites/statistics/test_analyze_mv.groovy | 56 +--- .../test_skip_agg_table_value_column.groovy | 293 ++++++++++++++++++ 22 files changed, 743 insertions(+), 157 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java create mode 100644 regression-test/suites/statistics/test_skip_agg_table_value_column.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index c2a0cace6b..009c51595b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -161,6 +161,7 @@ public class StatementContext implements Closeable { private final List joinFilters = new ArrayList<>(); private final List hints = new ArrayList<>(); + private boolean hintForcePreAggOn = false; // Map slot to its relation, currently used in SlotReference to find its original // Relation for example LogicalOlapScan @@ -268,6 +269,14 @@ public class StatementContext implements Closeable { this.needLockTables = needLockTables; } + public void setHintForcePreAggOn(boolean preAggOn) { + this.hintForcePreAggOn = preAggOn; + } + + public boolean isHintForcePreAggOn() { + return hintForcePreAggOn; + } + /** * cache view info to avoid view's def and sql mode changed before lock it. * diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java index 24129a1083..1a83f1391e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java @@ -30,6 +30,7 @@ import org.apache.doris.nereids.rules.analysis.CheckPolicy; import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint; import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias; import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant; +import org.apache.doris.nereids.rules.analysis.EliminateLogicalPreAggOnHint; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots; import org.apache.doris.nereids.rules.analysis.HavingToFilter; @@ -92,7 +93,8 @@ public class Analyzer extends AbstractBatchJobExecutor { return jobs( // we should eliminate hint before "Subquery unnesting". topDown(new AnalyzeCTE()), - topDown(new EliminateLogicalSelectHint()), + topDown(new EliminateLogicalSelectHint(), + new EliminateLogicalPreAggOnHint()), bottomUp( new BindRelation(), new CheckPolicy() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 127d73d24d..836984bee8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -445,6 +445,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalIntersect; import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.logical.LogicalSelectHint; @@ -1412,12 +1413,15 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor { return selectPlan; } List selectHintContexts = Lists.newArrayList(); + List preAggOnHintContexts = Lists.newArrayList(); for (Integer key : selectHintMap.keySet()) { if (key > selectCtx.getStart().getStopIndex() && key < selectCtx.getStop().getStartIndex()) { selectHintContexts.add(selectHintMap.get(key)); + } else { + preAggOnHintContexts.add(selectHintMap.get(key)); } } - return withSelectHint(selectPlan, selectHintContexts); + return withHints(selectPlan, selectHintContexts, preAggOnHintContexts); }); } @@ -3195,91 +3199,111 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor { return last; } - private LogicalPlan withSelectHint(LogicalPlan logicalPlan, List hintContexts) { - if (hintContexts.isEmpty()) { + private LogicalPlan withHints(LogicalPlan logicalPlan, List selectHintContexts, + List preAggOnHintContexts) { + if (selectHintContexts.isEmpty() && preAggOnHintContexts.isEmpty()) { return logicalPlan; } - ImmutableList.Builder hints = ImmutableList.builder(); - for (ParserRuleContext hintContext : hintContexts) { - SelectHintContext selectHintContext = (SelectHintContext) hintContext; - for (HintStatementContext hintStatement : selectHintContext.hintStatements) { - String hintName = hintStatement.hintName.getText().toLowerCase(Locale.ROOT); - switch (hintName) { - case "set_var": - Map> parameters = Maps.newLinkedHashMap(); - for (HintAssignmentContext kv : hintStatement.parameters) { - if (kv.key != null) { - String parameterName = visitIdentifierOrText(kv.key); - Optional value = Optional.empty(); - if (kv.constantValue != null) { - Literal literal = (Literal) visit(kv.constantValue); - value = Optional.ofNullable(literal.toLegacyLiteral().getStringValue()); - } else if (kv.identifierValue != null) { - // maybe we should throw exception when the identifierValue is quoted identifier - value = Optional.ofNullable(kv.identifierValue.getText()); + LogicalPlan newPlan = logicalPlan; + if (!selectHintContexts.isEmpty()) { + ImmutableList.Builder hints = ImmutableList.builder(); + for (ParserRuleContext hintContext : selectHintContexts) { + SelectHintContext selectHintContext = (SelectHintContext) hintContext; + for (HintStatementContext hintStatement : selectHintContext.hintStatements) { + String hintName = hintStatement.hintName.getText().toLowerCase(Locale.ROOT); + switch (hintName) { + case "set_var": + Map> parameters = Maps.newLinkedHashMap(); + for (HintAssignmentContext kv : hintStatement.parameters) { + if (kv.key != null) { + String parameterName = visitIdentifierOrText(kv.key); + Optional value = Optional.empty(); + if (kv.constantValue != null) { + Literal literal = (Literal) visit(kv.constantValue); + value = Optional.ofNullable(literal.toLegacyLiteral().getStringValue()); + } else if (kv.identifierValue != null) { + // maybe we should throw exception when the identifierValue is quoted identifier + value = Optional.ofNullable(kv.identifierValue.getText()); + } + parameters.put(parameterName, value); } - parameters.put(parameterName, value); } - } - SelectHintSetVar setVar = new SelectHintSetVar(hintName, parameters); - setVar.setVarOnceInSql(ConnectContext.get().getStatementContext()); - hints.add(setVar); - break; - case "leading": - List leadingParameters = new ArrayList(); - for (HintAssignmentContext kv : hintStatement.parameters) { - if (kv.key != null) { + SelectHintSetVar setVar = new SelectHintSetVar(hintName, parameters); + setVar.setVarOnceInSql(ConnectContext.get().getStatementContext()); + hints.add(setVar); + break; + case "leading": + List leadingParameters = new ArrayList(); + for (HintAssignmentContext kv : hintStatement.parameters) { + if (kv.key != null) { + String parameterName = visitIdentifierOrText(kv.key); + leadingParameters.add(parameterName); + } + } + hints.add(new SelectHintLeading(hintName, leadingParameters)); + break; + case "ordered": + hints.add(new SelectHintOrdered(hintName)); + break; + case "use_cbo_rule": + List useRuleParameters = new ArrayList(); + for (HintAssignmentContext kv : hintStatement.parameters) { String parameterName = visitIdentifierOrText(kv.key); - leadingParameters.add(parameterName); + useRuleParameters.add(parameterName); } - } - hints.add(new SelectHintLeading(hintName, leadingParameters)); - break; - case "ordered": - hints.add(new SelectHintOrdered(hintName)); - break; - case "use_cbo_rule": - List useRuleParameters = new ArrayList(); - for (HintAssignmentContext kv : hintStatement.parameters) { - String parameterName = visitIdentifierOrText(kv.key); - useRuleParameters.add(parameterName); - } - hints.add(new SelectHintUseCboRule(hintName, useRuleParameters, false)); - break; - case "no_use_cbo_rule": - List noUseRuleParameters = new ArrayList(); - for (HintAssignmentContext kv : hintStatement.parameters) { - String parameterName = visitIdentifierOrText(kv.key); - noUseRuleParameters.add(parameterName); - } - hints.add(new SelectHintUseCboRule(hintName, noUseRuleParameters, true)); - break; - case "use_mv": - List useIndexParameters = new ArrayList(); - for (HintAssignmentContext kv : hintStatement.parameters) { - String parameterName = visitIdentifierOrText(kv.key); - if (kv.key != null) { - useIndexParameters.add(parameterName); + hints.add(new SelectHintUseCboRule(hintName, useRuleParameters, false)); + break; + case "no_use_cbo_rule": + List noUseRuleParameters = new ArrayList(); + for (HintAssignmentContext kv : hintStatement.parameters) { + String parameterName = visitIdentifierOrText(kv.key); + noUseRuleParameters.add(parameterName); } - } - hints.add(new SelectHintUseMv(hintName, useIndexParameters, true)); - break; - case "no_use_mv": - List noUseIndexParameters = new ArrayList(); - for (HintAssignmentContext kv : hintStatement.parameters) { - String parameterName = visitIdentifierOrText(kv.key); - if (kv.key != null) { - noUseIndexParameters.add(parameterName); + hints.add(new SelectHintUseCboRule(hintName, noUseRuleParameters, true)); + break; + case "use_mv": + List useIndexParameters = new ArrayList(); + for (HintAssignmentContext kv : hintStatement.parameters) { + String parameterName = visitIdentifierOrText(kv.key); + if (kv.key != null) { + useIndexParameters.add(parameterName); + } } + hints.add(new SelectHintUseMv(hintName, useIndexParameters, true)); + break; + case "no_use_mv": + List noUseIndexParameters = new ArrayList(); + for (HintAssignmentContext kv : hintStatement.parameters) { + String parameterName = visitIdentifierOrText(kv.key); + if (kv.key != null) { + noUseIndexParameters.add(parameterName); + } + } + hints.add(new SelectHintUseMv(hintName, noUseIndexParameters, false)); + break; + default: + break; + } + } + } + newPlan = new LogicalSelectHint<>(hints.build(), newPlan); + } + if (!preAggOnHintContexts.isEmpty()) { + for (ParserRuleContext hintContext : preAggOnHintContexts) { + if (hintContext instanceof SelectHintContext) { + SelectHintContext preAggOnHintContext = (SelectHintContext) hintContext; + if (preAggOnHintContext.hintStatement != null + && preAggOnHintContext.hintStatement.hintName != null) { + String text = preAggOnHintContext.hintStatement.hintName.getText(); + if (text.equalsIgnoreCase("PREAGGOPEN")) { + newPlan = new LogicalPreAggOnHint<>(newPlan); + break; } - hints.add(new SelectHintUseMv(hintName, noUseIndexParameters, false)); - break; - default: - break; + } } } } - return new LogicalSelectHint<>(hints.build(), logicalPlan); + return newPlan; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index 7db8ac570d..7f4b5cb3fd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -120,6 +120,7 @@ public enum RuleType { ELIMINATE_GROUP_BY_CONSTANT(RuleTypeClass.REWRITE), ELIMINATE_LOGICAL_SELECT_HINT(RuleTypeClass.REWRITE), + ELIMINATE_LOGICAL_PRE_AGG_ON_HINT(RuleTypeClass.REWRITE), ELIMINATE_ORDER_BY_CONSTANT(RuleTypeClass.REWRITE), ELIMINATE_ORDER_BY_UNDER_SUBQUERY(RuleTypeClass.REWRITE), ELIMINATE_ORDER_BY_UNDER_VIEW(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index 9b78082d10..cbc2a93f18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -95,6 +95,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.commons.collections.CollectionUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.util.ArrayList; import java.util.List; @@ -104,6 +106,7 @@ import java.util.Optional; * Rule to bind relations in query plan. */ public class BindRelation extends OneAnalysisRuleFactory { + private static final Logger LOG = LogManager.getLogger(StatementContext.class); public BindRelation() {} @@ -179,7 +182,8 @@ public class BindRelation extends OneAnalysisRuleFactory { return getLogicalPlan(table, unboundRelation, tableQualifier, cascadesContext); } - private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, List qualifier) { + private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, List qualifier, + CascadesContext cascadesContext) { LogicalOlapScan scan; List partIds = getPartitionIds(table, unboundRelation, qualifier); List tabletIds = unboundRelation.getTabletIds(); @@ -215,6 +219,9 @@ public class BindRelation extends OneAnalysisRuleFactory { // This tabletIds is set manually, so need to set specifiedTabletIds scan = scan.withManuallySpecifiedTabletIds(tabletIds); } + if (cascadesContext.getStatementContext().isHintForcePreAggOn()) { + return scan.withPreAggStatus(PreAggStatus.on()); + } if (needGenerateLogicalAggForRandomDistAggTable(scan)) { // it's a random distribution agg table // add agg on olap scan @@ -381,7 +388,7 @@ public class BindRelation extends OneAnalysisRuleFactory { switch (table.getType()) { case OLAP: case MATERIALIZED_VIEW: - return makeOlapScan(table, unboundRelation, qualifierWithoutTableName); + return makeOlapScan(table, unboundRelation, qualifierWithoutTableName, cascadesContext); case VIEW: View view = (View) table; isView = true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java new file mode 100644 index 0000000000..9f40f8e97b --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.rules.rewrite.OneRewriteRuleFactory; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint; + +/** + * eliminate logical common hint and set them to cascade context + */ +public class EliminateLogicalPreAggOnHint extends OneRewriteRuleFactory { + + @Override + public Rule build() { + return logicalPreAggOnHint().thenApply(ctx -> { + LogicalPreAggOnHint preAggHintPlan = ctx.root; + ctx.statementContext.setHintForcePreAggOn(true); + return preAggHintPlan.child(); + }).toRule(RuleType.ELIMINATE_LOGICAL_PRE_AGG_ON_HINT); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index 7a985bf928..c08487ef88 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -62,6 +62,7 @@ public enum PlanType { LOGICAL_APPLY, LOGICAL_ASSERT_NUM_ROWS, LOGICAL_CHECK_POLICY, + LOGICAL_COMMON_HINT, LOGICAL_CTE, LOGICAL_CTE_ANCHOR, LOGICAL_CTE_PRODUCER, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java index 56a97ac5f6..40076fc8ea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java @@ -41,6 +41,7 @@ import org.apache.doris.nereids.rules.analysis.AnalyzeCTE; import org.apache.doris.nereids.rules.analysis.BindExpression; import org.apache.doris.nereids.rules.analysis.BindRelation; import org.apache.doris.nereids.rules.analysis.CheckPolicy; +import org.apache.doris.nereids.rules.analysis.EliminateLogicalPreAggOnHint; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; @@ -310,7 +311,8 @@ public class BaseViewInfo { private static List buildAnalyzeViewJobsForStar() { return jobs( - topDown(new EliminateLogicalSelectHint()), + topDown(new EliminateLogicalSelectHint(), + new EliminateLogicalPreAggOnHint()), topDown(new AnalyzeCTE()), bottomUp( new BindRelation(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java new file mode 100644 index 0000000000..f139308948 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.logical; + +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.plans.BlockFuncDepsPropagation; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Optional; + +/** + * PreAggOn hint. + */ +public class LogicalPreAggOnHint extends LogicalUnary + implements BlockFuncDepsPropagation { + + public LogicalPreAggOnHint(CHILD_TYPE child) { + this(Optional.empty(), Optional.empty(), child); + } + + /** + * LogicalCommonHint's full parameter constructor. + * @param groupExpression groupExpression exists when this plan is copy out from memo. + * @param logicalProperties logicalProperties is use for compute output + * @param child child plan + */ + public LogicalPreAggOnHint(Optional groupExpression, + Optional logicalProperties, CHILD_TYPE child) { + super(PlanType.LOGICAL_COMMON_HINT, groupExpression, logicalProperties, child); + } + + @Override + public LogicalPreAggOnHint withChildren(List children) { + Preconditions.checkArgument(children.size() == 1); + return new LogicalPreAggOnHint<>(children.get(0)); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitLogicalPreAggOnHint(this, context); + } + + @Override + public List getExpressions() { + return ImmutableList.of(); + } + + @Override + public Plan withGroupExpression(Optional groupExpression) { + return new LogicalPreAggOnHint<>(groupExpression, Optional.of(getLogicalProperties()), child()); + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional groupExpression, + Optional logicalProperties, List children) { + Preconditions.checkArgument(children.size() == 1); + return new LogicalPreAggOnHint<>(groupExpression, logicalProperties, children.get(0)); + } + + @Override + public List computeOutput() { + return child().getOutput(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java index 8db1407220..b6d3e24ba9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java @@ -38,6 +38,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalIntersect; import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalPartitionTopN; +import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; @@ -209,6 +210,10 @@ public abstract class PlanVisitor implements CommandVisitor, Relatio return visit(hint, context); } + public R visitLogicalPreAggOnHint(LogicalPreAggOnHint hint, C context) { + return visit(hint, context); + } + public R visitLogicalSetOperation(LogicalSetOperation setOperation, C context) { return visit(setOperation, context); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 9c43aed6c9..4b637ae9ef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -354,7 +354,13 @@ public class AnalysisManager implements Writable { long periodTimeInMs = stmt.getPeriodTimeInMs(); infoBuilder.setPeriodTimeInMs(periodTimeInMs); - List> jobColumns = table.getColumnIndexPairs(columnNames); + OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null; + boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE); + List> jobColumns = table.getColumnIndexPairs(columnNames).stream() + .filter(c -> olapTable == null || StatisticsUtil.canCollectColumn( + olapTable.getIndexMetaByIndexId(olapTable.getIndexIdByName(c.first)).getColumnByName(c.second), + table, isSampleAnalyze, olapTable.getIndexIdByName(c.first))) + .collect(Collectors.toList()); infoBuilder.setJobColumns(jobColumns); StringJoiner stringJoiner = new StringJoiner(",", "[", "]"); for (Pair pair : jobColumns) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 926b7499f0..1d4c69fbf9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -80,7 +80,8 @@ public abstract class BaseAnalysisTask { + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, " + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, " + "NOW() FROM ( " - + "SELECT * FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as t"; + + "SELECT * FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) " + + "as t ${preAggHint}"; protected static final String DUJ1_ANALYZE_TEMPLATE = "SELECT " + "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, " @@ -101,7 +102,8 @@ public abstract class BaseAnalysisTask { + " SELECT t0.`colValue` as `column_key`, COUNT(1) as `count`, SUM(`len`) as `column_length` " + " FROM " + " (SELECT ${subStringColName} AS `colValue`, LENGTH(`${colName}`) as `len` " - + " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as `t0` " + + " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as `t0`" + + " ${preAggHint}" + " GROUP BY `t0`.`colValue` " + ") as `t1` "; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java index 979ff04f79..d5d0458d74 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java @@ -173,6 +173,7 @@ public class ExternalAnalysisTask extends BaseAnalysisTask { commonParams.put("limit", ""); commonParams.put("scaleFactor", "1"); commonParams.put("index", ""); + commonParams.put("preAggHint", ""); if (col != null) { commonParams.put("type", col.getType().toString()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 3025904139..badf120af7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -137,13 +137,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask { } protected ResultRow collectMinMax() { - // Agg table value columns has no zone map. - // For these columns, skip collecting min and max value to avoid scan whole table. - if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS) && !col.isKey()) { - LOG.info("Aggregation table {} column {} is not a key column, skip collecting min and max.", - tbl.getName(), col.getName()); - return null; - } long startTime = System.currentTimeMillis(); Map params = buildSqlParams(); StringSubstitutor stringSubstitutor = new StringSubstitutor(params); @@ -265,6 +258,13 @@ public class OlapAnalysisTask extends BaseAnalysisTask { params.put("type", col.getType().toString()); params.put("limit", ""); + // For agg table and mor unique table, set PREAGGOPEN preAggHint. + if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS) + || ((OlapTable) tbl).getKeysType().equals(KeysType.UNIQUE_KEYS) + && !((OlapTable) tbl).isUniqKeyMergeOnWrite()) { + params.put("preAggHint", "/*+PREAGGOPEN*/"); + } + // If table row count is less than the target sample row count, simple scan the full table. if (tableRowCount <= targetSampleRows) { scanFullTable = true; @@ -334,6 +334,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask { params.put("colName", StatisticsUtil.escapeColumnName(String.valueOf(info.colName))); params.put("tblName", String.valueOf(tbl.getName())); params.put("index", getIndex()); + params.put("preAggHint", ""); return params; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 80da38392d..993f6e3fb9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -145,13 +145,21 @@ public class StatisticsAutoCollector extends MasterDaemon { } return; } + boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE); + OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null; List> needRunColumns = table.getColumnIndexPairs( table.getSchemaAllIndexes(false) .stream() .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) .map(Column::getName) - .collect(Collectors.toSet())); - if (needRunColumns == null || needRunColumns.isEmpty()) { + .collect(Collectors.toSet())) + .stream() + .filter(c -> olapTable == null || StatisticsUtil.canCollectColumn( + olapTable.getIndexMetaByIndexId(olapTable.getIndexIdByName(c.first)).getColumnByName(c.second), + table, isSampleAnalyze, olapTable.getIndexIdByName(c.first))) + .collect(Collectors.toList()); + + if (needRunColumns.isEmpty()) { return; } StringJoiner stringJoiner = new StringJoiner(",", "[", "]"); @@ -176,7 +184,7 @@ public class StatisticsAutoCollector extends MasterDaemon { protected AnalysisInfo createAnalysisInfo(TableIf table, AnalysisMethod analysisMethod, long rowCount, String colNames, List> needRunColumns, JobPriority priority) { - AnalysisInfo jobInfo = new AnalysisInfoBuilder() + return new AnalysisInfoBuilder() .setJobId(Env.getCurrentEnv().getNextId()) .setCatalogId(table.getDatabase().getCatalog().getId()) .setDBId(table.getDatabase().getId()) @@ -200,7 +208,6 @@ public class StatisticsAutoCollector extends MasterDaemon { .setPriority(priority) .setTableVersion(table instanceof OlapTable ? ((OlapTable) table).getVisibleVersion() : 0) .build(); - return jobInfo; } // Analysis job created by the system diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 33f099e2b0..95d2b81cbf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -180,17 +180,24 @@ public class TableStatsMeta implements Writable, GsonPostProcessable { if (tableIf != null) { if (tableIf instanceof OlapTable) { rowCount = analyzedJob.rowCount; + OlapTable olapTable = (OlapTable) tableIf; indexesRowCount.putAll(analyzedJob.indexesRowCount); - clearStaleIndexRowCount((OlapTable) tableIf); - } - if (analyzedJob.jobColumns.containsAll( - tableIf.getColumnIndexPairs( - tableIf.getSchemaAllIndexes(false).stream() - .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) - .map(Column::getName).collect(Collectors.toSet())))) { - updatedRows.set(0); - newPartitionLoaded.set(false); + clearStaleIndexRowCount(olapTable); + if (analyzedJob.jobColumns.containsAll( + olapTable.getColumnIndexPairs(olapTable.getSchemaAllIndexes(false) + .stream() + .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) + .map(Column::getName).collect(Collectors.toSet())) + .stream() + .filter(c -> StatisticsUtil.canCollectColumn(olapTable.getIndexMetaByIndexId( + olapTable.getIndexIdByName(c.first)).getColumnByName(c.second), + olapTable, true, olapTable.getIndexIdByName(c.first))) + .collect(Collectors.toSet()))) { + updatedRows.set(0); + newPartitionLoaded.set(false); + } } + rowCount = analyzedJob.rowCount; // Set userInject back to false after manual analyze. if (JobType.MANUAL.equals(jobType) && !analyzedJob.userInject) { userInjected = false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index d42210dfdc..f57fbb7beb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -37,6 +37,7 @@ import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -668,6 +669,25 @@ public class StatisticsUtil { || type instanceof AggStateType; } + public static boolean canCollectColumn(Column c, TableIf table, boolean isSampleAnalyze, long indexId) { + // Full analyze can collect all columns. + if (!isSampleAnalyze) { + return true; + } + // External table can collect all columns. + if (!(table instanceof OlapTable)) { + return true; + } + OlapTable olapTable = (OlapTable) table; + // Skip agg table value columns + KeysType keysType = olapTable.getIndexMetaByIndexId(indexId).getKeysType(); + if (KeysType.AGG_KEYS.equals(keysType) && !c.isKey()) { + return false; + } + // Skip mor unique table value columns + return !KeysType.UNIQUE_KEYS.equals(keysType) || olapTable.isUniqKeyMergeOnWrite() || c.isKey(); + } + public static void sleep(long millis) { try { Thread.sleep(millis); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index 3e0e309a48..886ae2055d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -22,6 +22,7 @@ import org.apache.doris.analysis.TableSample; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DataProperty; import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -168,7 +169,7 @@ public class OlapAnalysisTaskTest { + "${ndvFunction} as `ndv`, ROUND(SUM(CASE WHEN `null` IS NULL THEN 1 ELSE 0 END) * ${scaleFactor}) AS `null_count`, " + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, " + "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() FROM " - + "( SELECT * FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as t", sql); + + "( SELECT * FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as t ", sql); return; } }; @@ -196,7 +197,8 @@ public class OlapAnalysisTaskTest { + "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() " + "FROM ( SELECT t0.`colValue` as `column_key`, COUNT(1) as `count`, SUM(`len`) as `column_length` " + "FROM (SELECT ${subStringColName} AS `colValue`, LENGTH(`null`) as `len` " - + "FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as `t0` GROUP BY `t0`.`colValue` ) as `t1` ", sql); + + "FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as `t0` " + + " GROUP BY `t0`.`colValue` ) as `t1` ", sql); return; } @@ -365,22 +367,70 @@ public class OlapAnalysisTaskTest { return false; } }; - task.col = new Column("test", PrimitiveType.INT); + + new MockUp() { + @Mock + public KeysType getKeysType() { + return KeysType.DUP_KEYS; + } + }; + task.col = new Column("testColumn", Type.INT, true, null, null, ""); + task.setTable(new OlapTable()); task.getSampleParams(params, 10); Assertions.assertTrue(task.scanFullTable()); Assertions.assertEquals("1.0", params.get("scaleFactor")); Assertions.assertEquals("", params.get("sampleHints")); Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(`t1`.`column_key`) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) + SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) * SUM(`t1`.`count`) / 10)", params.get("ndvFunction")); + Assertions.assertNull(params.get("preAggHint")); params.clear(); + new MockUp() { + @Mock + public KeysType getKeysType() { + return KeysType.AGG_KEYS; + } + }; task = new OlapAnalysisTask(); - task.col = new Column("test", PrimitiveType.INT); + task.col = new Column("testColumn", Type.INT, false, null, null, ""); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("10.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(`t1`.`column_key`) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) + SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) * SUM(`t1`.`count`) / 1000)", params.get("ndvFunction")); Assertions.assertEquals("SUM(t1.count) * 4", params.get("dataSizeFunction")); Assertions.assertEquals("`${colName}`", params.get("subStringColName")); + Assertions.assertEquals("/*+PREAGGOPEN*/", params.get("preAggHint")); + params.clear(); + + new MockUp() { + @Mock + public KeysType getKeysType() { + return KeysType.UNIQUE_KEYS; + } + + @Mock + public boolean isUniqKeyMergeOnWrite() { + return false; + } + }; + task = new OlapAnalysisTask(); + task.col = new Column("testColumn", Type.INT, false, null, null, ""); + task.setTable(new OlapTable()); + task.getSampleParams(params, 1000); + Assertions.assertEquals("/*+PREAGGOPEN*/", params.get("preAggHint")); + params.clear(); + + new MockUp() { + @Mock + public boolean isUniqKeyMergeOnWrite() { + return true; + } + }; + task = new OlapAnalysisTask(); + task.col = new Column("testColumn", Type.INT, false, null, null, ""); + task.setTable(new OlapTable()); + task.getSampleParams(params, 1000); + Assertions.assertNull(params.get("preAggHint")); params.clear(); new MockUp() { @@ -397,6 +447,7 @@ public class OlapAnalysisTaskTest { task = new OlapAnalysisTask(); task.col = new Column("test", PrimitiveType.INT); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("10.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); @@ -411,6 +462,7 @@ public class OlapAnalysisTaskTest { }; task = new OlapAnalysisTask(); task.col = new Column("test", PrimitiveType.INT); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("10.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); @@ -430,6 +482,7 @@ public class OlapAnalysisTaskTest { }; task = new OlapAnalysisTask(); task.col = new Column("test", PrimitiveType.INT); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("20.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); @@ -441,6 +494,7 @@ public class OlapAnalysisTaskTest { task.col = new Column("test", Type.fromPrimitiveType(PrimitiveType.INT), true, null, null, null); task.setKeyColumnSampleTooManyRows(true); + task.setTable(new OlapTable()); task.getSampleParams(params, 2000000000); Assertions.assertEquals("2.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index 0e221673e9..88b4c45915 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -17,6 +17,9 @@ package org.apache.doris.statistics.util; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MaterializedIndexMeta; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Type; @@ -24,6 +27,9 @@ import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.Pair; import org.apache.doris.datasource.ExternalTable; +import org.apache.doris.datasource.hive.HMSExternalCatalog; +import org.apache.doris.datasource.hive.HMSExternalDatabase; +import org.apache.doris.datasource.hive.HMSExternalTable; import org.apache.doris.qe.SessionVariable; import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; @@ -31,6 +37,7 @@ import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.ColStatsMeta; import org.apache.doris.statistics.ResultRow; import org.apache.doris.statistics.TableStatsMeta; +import org.apache.doris.thrift.TStorageType; import com.google.common.collect.Lists; import mockit.Mock; @@ -43,6 +50,7 @@ import java.time.LocalTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Base64; +import java.util.List; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -235,4 +243,58 @@ class StatisticsUtilTest { tableMeta.setColToColStatsMeta(colToColStatsMeta); Assertions.assertFalse(StatisticsUtil.tableNotAnalyzedForTooLong(olapTable, tableMeta)); } + + @Test + void testCanCollectColumn() { + Column column = new Column("testColumn", Type.INT, true, null, null, ""); + List schema = new ArrayList<>(); + schema.add(column); + OlapTable table = new OlapTable(200, "testTable", schema, KeysType.AGG_KEYS, null, null); + + // Test full analyze always return true; + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, false, 1)); + + // Test null table return true; + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, null, true, 1)); + + // Test external table always return true; + HMSExternalCatalog externalCatalog = new HMSExternalCatalog(); + HMSExternalDatabase externalDatabase = new HMSExternalDatabase(externalCatalog, 1L, "dbName", "dbName"); + HMSExternalTable hmsTable = new HMSExternalTable(1, "name", "name", externalCatalog, externalDatabase); + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, hmsTable, true, 1)); + + // Test agg key return true; + MaterializedIndexMeta meta = new MaterializedIndexMeta(1L, schema, 1, 1, (short) 1, TStorageType.COLUMN, KeysType.AGG_KEYS, null); + new MockUp() { + @Mock + public MaterializedIndexMeta getIndexMetaByIndexId(long indexId) { + return meta; + } + }; + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + // Test agg value return false + column = new Column("testColumn", Type.INT, false, null, null, ""); + Assertions.assertFalse(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + // Test unique mor value column return false + MaterializedIndexMeta meta1 = new MaterializedIndexMeta(1L, schema, 1, 1, (short) 1, TStorageType.COLUMN, KeysType.UNIQUE_KEYS, null); + new MockUp() { + @Mock + public MaterializedIndexMeta getIndexMetaByIndexId(long indexId) { + return meta1; + } + + @Mock + public boolean isUniqKeyMergeOnWrite() { + return false; + } + }; + Assertions.assertFalse(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + // Test unique mor key column return true + column = new Column("testColumn", Type.INT, true, null, null, ""); + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + } } diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 6d845a11da..38c429d1a4 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -263,8 +263,7 @@ suite("test_analyze") { sql """analyze table agg_table_test with sample rows 100 with sync""" def agg_result = sql """show column stats agg_table_test (name)""" logger.info("show column agg_table_test(name) stats: " + agg_result) - assertEquals(agg_result[0][7], "N/A") - assertEquals(agg_result[0][8], "N/A") + assertEquals(0, agg_result.size()) // Continue test partition load data for the first time. def reported = false; diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy b/regression-test/suites/statistics/test_analyze_mv.groovy index 829a20d6ed..83de63935e 100644 --- a/regression-test/suites/statistics/test_analyze_mv.groovy +++ b/regression-test/suites/statistics/test_analyze_mv.groovy @@ -559,6 +559,7 @@ suite("test_analyze_mv") { assertEquals("4", result_row[0][4]) } + sql """drop stats mvTestDup""" sql """analyze table mvTestDup with sample rows 4000000""" wait_analyze_finish("mvTestDup") result_sample = sql """SHOW ANALYZE mvTestDup;""" @@ -567,7 +568,7 @@ suite("test_analyze_mv") { logger.info("Analyze job id is " + jobId) result_sample = sql """show column stats mvTestDup""" - assertEquals(12, result_sample.size()) + assertEquals(9, result_sample.size()) result_sample = sql """show column stats mvTestDup(key1)""" logger.info("result " + result_sample) @@ -629,60 +630,18 @@ suite("test_analyze_mv") { result_sample = sql """show column stats mvTestDup(`mva_SUM__CAST(``value1`` AS bigint)`)""" logger.info("result " + result_sample) - if ("MANUAL" != result_sample[0][11]) { - logger.info("Overwrite by auto analyze, analyze it again.") - sql """analyze table mvTestDup with sync with sample rows 4000000""" - result_sample = sql """show column stats mvTestDup(`mva_SUM__CAST(``value1`` AS bigint)`)""" - logger.info("result after reanalyze " + result_sample) - } - assertEquals(1, result_sample.size()) - assertEquals("mva_SUM__CAST(`value1` AS bigint)", result_sample[0][0]) - assertEquals("mv3", result_sample[0][1]) - assertEquals("4.0", result_sample[0][2]) - assertEquals("4.0", result_sample[0][3]) - assertEquals("6", result_sample[0][7]) - assertEquals("3001", result_sample[0][8]) - assertEquals("SAMPLE", result_sample[0][9]) - assertEquals("MANUAL", result_sample[0][11]) + assertEquals(0, result_sample.size()) result_sample = sql """show column stats mvTestDup(`mva_MAX__``value2```)""" logger.info("result " + result_sample) - if ("MANUAL" != result_sample[0][11]) { - logger.info("Overwrite by auto analyze, analyze it again.") - sql """analyze table mvTestDup with sync with sample rows 4000000""" - result_sample = sql """show column stats mvTestDup(`mva_MAX__``value2```)""" - logger.info("result after reanalyze " + result_sample) - } - assertEquals(1, result_sample.size()) - assertEquals("mva_MAX__`value2`", result_sample[0][0]) - assertEquals("mv3", result_sample[0][1]) - assertEquals("4.0", result_sample[0][2]) - assertEquals("4.0", result_sample[0][3]) - assertEquals("4", result_sample[0][7]) - assertEquals("4001", result_sample[0][8]) - assertEquals("SAMPLE", result_sample[0][9]) - assertEquals("MANUAL", result_sample[0][11]) + assertEquals(0, result_sample.size()) result_sample = sql """show column stats mvTestDup(`mva_MIN__``value3```)""" logger.info("result " + result_sample) - if ("MANUAL" != result_sample[0][11]) { - logger.info("Overwrite by auto analyze, analyze it again.") - sql """analyze table mvTestDup with sync with sample rows 4000000""" - result_sample = sql """show column stats mvTestDup(`mva_MIN__``value3```)""" - logger.info("result after reanalyze " + result_sample) - } - assertEquals(1, result_sample.size()) - assertEquals("mva_MIN__`value3`", result_sample[0][0]) - assertEquals("mv3", result_sample[0][1]) - assertEquals("4.0", result_sample[0][2]) - assertEquals("4.0", result_sample[0][3]) - assertEquals("5", result_sample[0][7]) - assertEquals("5001", result_sample[0][8]) - assertEquals("SAMPLE", result_sample[0][9]) - assertEquals("MANUAL", result_sample[0][11]) + assertEquals(0, result_sample.size()) result_sample = sql """show analyze task status ${jobId}""" - assertEquals(12, result_sample.size()) + assertEquals(9, result_sample.size()) def verifyTaskStatus = { result, colName, indexName -> def found = false; for (int i = 0; i < result.size(); i++) { @@ -702,9 +661,6 @@ suite("test_analyze_mv") { verifyTaskStatus(result_sample, "mv_key1", "mv3") verifyTaskStatus(result_sample, "mv_key2", "mv2") verifyTaskStatus(result_sample, "mv_key2", "mv3") - verifyTaskStatus(result_sample, "mva_MAX__`value2`", "mv3") - verifyTaskStatus(result_sample, "mva_MIN__`value3`", "mv3") - verifyTaskStatus(result_sample, "mva_SUM__CAST(`value1` AS bigint)", "mv3") if (!isCloudMode()) { // Test row count report and report for nereids diff --git a/regression-test/suites/statistics/test_skip_agg_table_value_column.groovy b/regression-test/suites/statistics/test_skip_agg_table_value_column.groovy new file mode 100644 index 0000000000..2bb902d592 --- /dev/null +++ b/regression-test/suites/statistics/test_skip_agg_table_value_column.groovy @@ -0,0 +1,293 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_skip_agg_table_value_column") { + + def wait_row_count_reported = { db, table, row, column, expected -> + def result = sql """show frontends;""" + logger.info("show frontends result origin: " + result) + def host + def port + for (int i = 0; i < result.size(); i++) { + if (result[i][8] == "true") { + host = result[i][1] + port = result[i][4] + } + } + def tokens = context.config.jdbcUrl.split('/') + def url=tokens[0] + "//" + host + ":" + port + logger.info("Master url is " + url) + connect(context.config.jdbcUser, context.config.jdbcPassword, url) { + sql """use ${db}""" + result = sql """show frontends;""" + logger.info("show frontends result master: " + result) + for (int i = 0; i < 120; i++) { + Thread.sleep(5000) + result = sql """SHOW DATA FROM ${table};""" + logger.info("result " + result) + if (result[row][column] == expected) { + return; + } + } + throw new Exception("Row count report timeout.") + } + + } + + sql """drop database if exists test_skip_agg_table_value_column""" + sql """create database test_skip_agg_table_value_column""" + sql """use test_skip_agg_table_value_column""" + sql """set global enable_auto_analyze=false""" + + // Test duplicate table + sql """CREATE TABLE dup ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + DUPLICATE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ) + """ + createMV("create materialized view mv1 as select key1 from dup;") + createMV("create materialized view mv2 as select key2 from dup;") + createMV("create materialized view mv3 as select key1, key2, sum(value1), max(value2), min(value3) from dup group by key1, key2;") + sql """insert into dup values (1, 2, 3, 4, 5), (1, 2, 3, 4, 5), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + // Test aggregate table. + sql """CREATE TABLE agg ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int SUM NOT NULL, + value2 int MAX NOT NULL, + value3 int MIN NOT NULL + )ENGINE=OLAP + AGGREGATE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key2 from agg group by key2;") + createMV("create materialized view mv3 as select key1, key2, sum(value1), max(value2), min(value3) from agg group by key1, key2;") + createMV("create materialized view mv6 as select key1, sum(value1) from agg group by key1;") + sql """insert into agg values (1, 2, 3, 4, 5), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + sql """CREATE TABLE agg_partition ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int SUM NOT NULL, + value2 int MAX NOT NULL, + value3 int MIN NOT NULL + )ENGINE=OLAP + AGGREGATE KEY(`key1`, `key2`) + COMMENT "OLAP" + PARTITION BY RANGE(`key1`) + (PARTITION p1 VALUES [("0"), ("100")), + PARTITION p2 VALUES [("100"), ("10000"))) + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key2 from agg_partition group by key2;") + createMV("create materialized view mv3 as select key1, key2, sum(value1), max(value2), min(value3) from agg_partition group by key1, key2;") + createMV("create materialized view mv6 as select key1, sum(value1) from agg_partition group by key1;") + sql """analyze table agg_partition(key1) with sync""" + sql """insert into agg_partition values (1, 2, 3, 4, 5), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + // Test unique table + sql """ + CREATE TABLE uni_mor ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + UNIQUE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "enable_unique_key_merge_on_write" = false, + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key1, key2 from uni_mor;") + createMV("create materialized view mv6 as select key1, key2, value2, value3 from uni_mor;") + sql """insert into uni_mor values (1, 2, 3, 4, 5), (1, 2, 3, 7, 8), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + sql """ + CREATE TABLE uni_mor_partition ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + UNIQUE KEY(`key1`, `key2`) + COMMENT "OLAP" + PARTITION BY RANGE(`key1`) + (PARTITION p1 VALUES [("0"), ("100")), + PARTITION p2 VALUES [("100"), ("10000"))) + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "enable_unique_key_merge_on_write" = false, + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key1, key2 from uni_mor_partition;") + createMV("create materialized view mv6 as select key1, key2, value2, value3 from uni_mor_partition;") + sql """analyze table uni_mor_partition(key1) with sync""" + sql """insert into uni_mor_partition values (1, 2, 3, 4, 5), (1, 2, 3, 7, 8), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + sql """ + CREATE TABLE uni_mow ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + UNIQUE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "enable_unique_key_merge_on_write" = true, + "replication_num" = "1" + ); + """ + sql """insert into uni_mow values (1, 2, 3, 4, 5), (1, 2, 3, 7, 8), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 0, 4, "6") + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 1, 4, "6") + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 2, 4, "6") + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 3, 4, "4") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 2, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 3, 4, "4") + wait_row_count_reported("test_skip_agg_table_value_column", "agg_partition", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg_partition", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg_partition", 2, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg_partition", 3, 4, "4") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor", 2, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor_partition", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor_partition", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor_partition", 2, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mow", 0, 4, "5") + + sql """analyze table dup with sync""" + def result = sql """show column stats dup""" + assertEquals(12, result.size()) + sql """drop stats dup""" + result = sql """show column stats dup""" + assertEquals(0, result.size()) + sql """analyze table dup with sync with sample rows 400000""" + result = sql """show column stats dup""" + assertEquals(9, result.size()) + sql """drop stats dup""" + result = sql """show column stats dup""" + assertEquals(0, result.size()) + + sql """analyze table agg with sync""" + result = sql """show column stats agg""" + assertEquals(13, result.size()) + sql """drop stats agg""" + result = sql """show column stats agg""" + assertEquals(0, result.size()) + sql """analyze table agg with sync with sample rows 400000""" + result = sql """show column stats agg""" + assertEquals(6, result.size()) + sql """drop stats agg""" + result = sql """show column stats agg""" + assertEquals(0, result.size()) + + result = sql """show table stats agg_partition""" + assertEquals("true", result[0][6]) + sql """analyze table agg_partition with sync with sample rows 400000""" + result = sql """show column stats agg_partition""" + assertEquals(6, result.size()) + result = sql """show table stats agg_partition""" + assertEquals("false", result[0][6]) + + sql """analyze table uni_mor with sync""" + result = sql """show column stats uni_mor""" + assertEquals(11, result.size()) + sql """drop stats uni_mor""" + result = sql """show column stats uni_mor""" + assertEquals(0, result.size()) + sql """analyze table uni_mor with sync with sample rows 400000""" + result = sql """show column stats uni_mor""" + assertEquals(6, result.size()) + sql """drop stats uni_mor""" + result = sql """show column stats uni_mor""" + assertEquals(0, result.size()) + + result = sql """show table stats uni_mor_partition""" + assertEquals("true", result[0][6]) + sql """analyze table uni_mor_partition with sync with sample rows 400000""" + result = sql """show column stats uni_mor_partition""" + assertEquals(6, result.size()) + result = sql """show table stats uni_mor_partition""" + assertEquals("false", result[0][6]) + + sql """analyze table uni_mow with sync""" + result = sql """show column stats uni_mow""" + assertEquals(5, result.size()) + sql """drop stats uni_mow""" + result = sql """show column stats uni_mow""" + assertEquals(0, result.size()) + sql """analyze table uni_mow with sync with sample rows 400000""" + result = sql """show column stats uni_mow""" + assertEquals(5, result.size()) + sql """drop stats uni_mow""" + result = sql """show column stats uni_mow""" + assertEquals(0, result.size()) + + // Test PREAGGOPEN hint. + explain { + sql("SELECT CONCAT(1744255158798, '-', 1744255158812, '-', 'mv_key2') AS `id`, 0 AS `catalog_id`, 1744255158742 AS `db_id`, 1744255158798 AS `tbl_id`, 1744255158812 AS `idx_id`, 'mv_key2' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key2`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key2` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_size`, NOW() FROM ( SELECT * FROM `internal`.`test_skip_agg_table_value_column`.`uni_mor` index `mv1` ) as t /*+PREAGGOPEN*/") + contains "PREAGGREGATION: ON" + notContains "PREAGGREGATION: OFF" + } + explain { + sql("SELECT CONCAT(1744255158798, '-', 1744255158812, '-', 'mv_key2') AS `id`, 0 AS `catalog_id`, 1744255158742 AS `db_id`, 1744255158798 AS `tbl_id`, 1744255158812 AS `idx_id`, 'mv_key2' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key2`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key2` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_size`, NOW() FROM ( SELECT * FROM `internal`.`test_skip_agg_table_value_column`.`uni_mor` index `mv1` ) as t") + contains "PREAGGREGATION: OFF" + notContains "PREAGGREGATION: ON" + } + explain { + sql("SELECT CONCAT(1744255159211, '-', 1744255159224, '-', 'mv_key1') AS `id`, 0 AS `catalog_id`, 1744255159182 AS `db_id`, 1744255159211 AS `tbl_id`, 1744255159224 AS `idx_id`, 'mv_key1' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key1`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key1` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('1001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_size`, NOW() FROM ( SELECT * FROM `internal`.`test_skip_agg_table_value_column`.`agg` index `mv3` ) as t /*+PREAGGOPEN*/") + contains "PREAGGREGATION: ON" + notContains "PREAGGREGATION: OFF" + } + explain { + sql("SELECT CONCAT(1744255159211, '-', 1744255159224, '-', 'mv_key1') AS `id`, 0 AS `catalog_id`, 1744255159182 AS `db_id`, 1744255159211 AS `tbl_id`, 1744255159224 AS `idx_id`, 'mv_key1' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key1`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key1` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('1001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_size`, NOW() FROM ( SELECT * FROM `internal`.`test_skip_agg_table_value_column`.`agg` index `mv3` ) as t") + contains "PREAGGREGATION: OFF" + notContains "PREAGGREGATION: ON" + } + + sql """drop database if exists test_skip_agg_table_value_column""" +}