diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 0acc46b7aa..9832221f8e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -126,6 +126,11 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion; import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow; import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; +import org.apache.doris.nereids.types.ArrayType; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.MapType; +import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.JoinUtils; import org.apache.doris.nereids.util.Utils; @@ -204,7 +209,6 @@ import java.util.stream.Stream; public class PhysicalPlanTranslator extends DefaultPlanVisitor { private static final Logger LOG = LogManager.getLogger(PhysicalPlanTranslator.class); - private final StatsErrorEstimator statsErrorEstimator; private final PlanTranslatorContext context; @@ -236,6 +240,14 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor scans = context.getScanNodeWithUnknownColumnStats(); + if (!scans.isEmpty()) { + StringBuilder builder = new StringBuilder(); + scans.forEach(scanNode -> builder.append(scanNode)); + throw new AnalysisException("tables with unknown column stats: " + builder); + } + } return rootFragment; } @@ -542,6 +554,15 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor tablePushAggOp = Maps.newHashMap(); + private final Map> statsUnknownColumnsMap = Maps.newHashMap(); + public PlanTranslatorContext(CascadesContext ctx) { this.translator = new RuntimeFilterTranslator(ctx.getRuntimeFilterContext()); } @@ -106,6 +110,34 @@ public class PlanTranslatorContext { translator = null; } + /** + * remember the unknown-stats column and its scan, used for forbid_unknown_col_stats check + */ + public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) { + Set slots = statsUnknownColumnsMap.get(scan); + if (slots == null) { + statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId)); + } else { + statsUnknownColumnsMap.get(scan).add(slotId); + } + } + + public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) { + Set unknownSlots = statsUnknownColumnsMap.get(scan); + if (unknownSlots == null) { + return false; + } + return unknownSlots.contains(slotId); + } + + public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) { + statsUnknownColumnsMap.remove(scan); + } + + public Set getScanNodeWithUnknownColumnStats() { + return statsUnknownColumnsMap.keySet(); + } + public List getPlanFragments() { return planFragments; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index c357932728..0566a2d3f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -28,7 +28,6 @@ import org.apache.doris.common.Config; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.nereids.CascadesContext; -import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.trees.expressions.Alias; @@ -126,7 +125,6 @@ import org.apache.doris.statistics.StatisticConstants; import org.apache.doris.statistics.StatisticRange; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; -import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; @@ -639,36 +637,16 @@ public class StatsCalculator extends DefaultPlanVisitor { .setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()) .build(); } - if (cache.isUnKnown) { - if (forbidUnknownColStats && !shouldIgnoreThisCol) { - if (StatisticsUtil.statsTblAvailable()) { - throw new AnalysisException(String.format("Found unknown stats for column:%s.%s.\n" - + "It may caused by:\n" - + "\n" - + "1. This column never got analyzed\n" - + "2. This table is empty\n" - + "3. Stats load failed caused by unstable of backends," - + "and FE cached the unknown stats by default in this scenario\n" - + "4. There is a bug, please report it to Doris community\n" - + "\n" - + "If an unknown stats for this column is tolerable," - + "you could set session variable `forbid_unknown_col_stats` to false to make planner" - + " ignore this error and keep planning.", table.getName(), colName)); - } else { - throw new AnalysisException("BE is not available!"); - } + if (!cache.isUnKnown) { + rowCount = Math.max(rowCount, cache.count); + cache = setOlapPartitionInfo(table, cache); + Histogram histogram = getColumnHistogram(table, colName); + if (histogram != null) { + ColumnStatisticBuilder columnStatisticBuilder = + new ColumnStatisticBuilder(cache).setHistogram(histogram); + columnStatisticMap.put(slotReference, columnStatisticBuilder.build()); + cache = columnStatisticBuilder.build(); } - columnStatisticMap.put(slotReference, cache); - continue; - } - rowCount = Math.max(rowCount, cache.count); - cache = setOlapPartitionInfo(table, cache); - Histogram histogram = getColumnHistogram(table, colName); - if (histogram != null) { - ColumnStatisticBuilder columnStatisticBuilder = - new ColumnStatisticBuilder(cache).setHistogram(histogram); - columnStatisticMap.put(slotReference, columnStatisticBuilder.build()); - cache = columnStatisticBuilder.build(); } columnStatisticMap.put(slotReference, cache); } diff --git a/regression-test/suites/nereids_p0/test_forbid_unknown_col_stats.groovy b/regression-test/suites/nereids_p0/test_forbid_unknown_col_stats.groovy new file mode 100644 index 0000000000..a0d9d75499 --- /dev/null +++ b/regression-test/suites/nereids_p0/test_forbid_unknown_col_stats.groovy @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_forbid_unknown_col_stats") { + + sql "drop table if exists test_forbid_unknown_col_stats_tbl" + sql """ + create table test_forbid_unknown_col_stats_tbl( + `r_regionkey` int(11) NOT NULL, + `r_name` Array, + `r_comment` int(11) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`r_regionkey`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "set forbid_unknown_col_stats = true" + sql "alter table test_forbid_unknown_col_stats_tbl modify column r_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');" + sql "select r_regionkey from test_forbid_unknown_col_stats_tbl;" + + sql "select r_name from test_forbid_unknown_col_stats_tbl;" + + test{ + sql "select * from test_forbid_unknown_col_stats_tbl;" + exception "tables with unknown column stats: OlapScanNode{tid=0, tblName=test_forbid_unknown_col_stats_tbl, keyRanges=, preds= limit=-1}" + } + + +}