[refactor](nereids) make forbid_unknown_col_stats check more accurate (#24061)
ignore unknown col stats check if: colunm not used in query column is Array/Json/Map/Struct type
This commit is contained in:
@ -126,6 +126,11 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion;
|
||||
import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow;
|
||||
import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter;
|
||||
import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor;
|
||||
import org.apache.doris.nereids.types.ArrayType;
|
||||
import org.apache.doris.nereids.types.DataType;
|
||||
import org.apache.doris.nereids.types.JsonType;
|
||||
import org.apache.doris.nereids.types.MapType;
|
||||
import org.apache.doris.nereids.types.StructType;
|
||||
import org.apache.doris.nereids.util.ExpressionUtils;
|
||||
import org.apache.doris.nereids.util.JoinUtils;
|
||||
import org.apache.doris.nereids.util.Utils;
|
||||
@ -204,7 +209,6 @@ import java.util.stream.Stream;
|
||||
public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, PlanTranslatorContext> {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(PhysicalPlanTranslator.class);
|
||||
|
||||
private final StatsErrorEstimator statsErrorEstimator;
|
||||
private final PlanTranslatorContext context;
|
||||
|
||||
@ -236,6 +240,14 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
|
||||
Collections.reverse(context.getPlanFragments());
|
||||
// TODO: maybe we need to trans nullable directly? and then we could remove call computeMemLayout
|
||||
context.getDescTable().computeMemLayout();
|
||||
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
|
||||
Set<ScanNode> scans = context.getScanNodeWithUnknownColumnStats();
|
||||
if (!scans.isEmpty()) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
scans.forEach(scanNode -> builder.append(scanNode));
|
||||
throw new AnalysisException("tables with unknown column stats: " + builder);
|
||||
}
|
||||
}
|
||||
return rootFragment;
|
||||
}
|
||||
|
||||
@ -542,6 +554,15 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
|
||||
// TODO: move all node set cardinality into one place
|
||||
if (olapScan.getStats() != null) {
|
||||
olapScanNode.setCardinality((long) olapScan.getStats().getRowCount());
|
||||
if (ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
|
||||
for (int i = 0; i < slots.size(); i++) {
|
||||
Slot slot = slots.get(i);
|
||||
if (olapScan.getStats().findColumnStatistics(slot).isUnKnown()
|
||||
&& !isComplexDataType(slot.getDataType())) {
|
||||
context.addUnknownStatsColumn(olapScanNode, tupleDescriptor.getSlots().get(i).getId());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: Do we really need tableName here?
|
||||
TableName tableName = new TableName(null, "", "");
|
||||
@ -2000,6 +2021,14 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
|
||||
scanNode.getTupleDesc().getSlots().add(smallest);
|
||||
}
|
||||
try {
|
||||
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
|
||||
for (SlotId slotId : requiredByProjectSlotIdSet) {
|
||||
if (context.isColumnStatsUnknown(scanNode, slotId)) {
|
||||
throw new AnalysisException("meet unknown column stats on table " + scanNode);
|
||||
}
|
||||
}
|
||||
context.removeScanFromStatsUnknownColumnsMap(scanNode);
|
||||
}
|
||||
scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet);
|
||||
} catch (UserException e) {
|
||||
Util.logAndThrowRuntimeException(LOG,
|
||||
@ -2262,4 +2291,9 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
|
||||
}
|
||||
return outputExprs;
|
||||
}
|
||||
|
||||
private boolean isComplexDataType(DataType dataType) {
|
||||
return dataType instanceof ArrayType || dataType instanceof MapType || dataType instanceof JsonType
|
||||
|| dataType instanceof StructType;
|
||||
}
|
||||
}
|
||||
|
||||
@ -47,11 +47,13 @@ import org.apache.doris.thrift.TPushAggOp;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
@ -97,6 +99,8 @@ public class PlanTranslatorContext {
|
||||
|
||||
private final Map<RelationId, TPushAggOp> tablePushAggOp = Maps.newHashMap();
|
||||
|
||||
private final Map<ScanNode, Set<SlotId>> statsUnknownColumnsMap = Maps.newHashMap();
|
||||
|
||||
public PlanTranslatorContext(CascadesContext ctx) {
|
||||
this.translator = new RuntimeFilterTranslator(ctx.getRuntimeFilterContext());
|
||||
}
|
||||
@ -106,6 +110,34 @@ public class PlanTranslatorContext {
|
||||
translator = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* remember the unknown-stats column and its scan, used for forbid_unknown_col_stats check
|
||||
*/
|
||||
public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) {
|
||||
Set<SlotId> slots = statsUnknownColumnsMap.get(scan);
|
||||
if (slots == null) {
|
||||
statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId));
|
||||
} else {
|
||||
statsUnknownColumnsMap.get(scan).add(slotId);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) {
|
||||
Set<SlotId> unknownSlots = statsUnknownColumnsMap.get(scan);
|
||||
if (unknownSlots == null) {
|
||||
return false;
|
||||
}
|
||||
return unknownSlots.contains(slotId);
|
||||
}
|
||||
|
||||
public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) {
|
||||
statsUnknownColumnsMap.remove(scan);
|
||||
}
|
||||
|
||||
public Set<ScanNode> getScanNodeWithUnknownColumnStats() {
|
||||
return statsUnknownColumnsMap.keySet();
|
||||
}
|
||||
|
||||
public List<PlanFragment> getPlanFragments() {
|
||||
return planFragments;
|
||||
}
|
||||
|
||||
@ -28,7 +28,6 @@ import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.FeConstants;
|
||||
import org.apache.doris.common.Pair;
|
||||
import org.apache.doris.nereids.CascadesContext;
|
||||
import org.apache.doris.nereids.exceptions.AnalysisException;
|
||||
import org.apache.doris.nereids.memo.Group;
|
||||
import org.apache.doris.nereids.memo.GroupExpression;
|
||||
import org.apache.doris.nereids.trees.expressions.Alias;
|
||||
@ -126,7 +125,6 @@ import org.apache.doris.statistics.StatisticConstants;
|
||||
import org.apache.doris.statistics.StatisticRange;
|
||||
import org.apache.doris.statistics.Statistics;
|
||||
import org.apache.doris.statistics.StatisticsBuilder;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Maps;
|
||||
@ -639,36 +637,16 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
|
||||
.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize())
|
||||
.build();
|
||||
}
|
||||
if (cache.isUnKnown) {
|
||||
if (forbidUnknownColStats && !shouldIgnoreThisCol) {
|
||||
if (StatisticsUtil.statsTblAvailable()) {
|
||||
throw new AnalysisException(String.format("Found unknown stats for column:%s.%s.\n"
|
||||
+ "It may caused by:\n"
|
||||
+ "\n"
|
||||
+ "1. This column never got analyzed\n"
|
||||
+ "2. This table is empty\n"
|
||||
+ "3. Stats load failed caused by unstable of backends,"
|
||||
+ "and FE cached the unknown stats by default in this scenario\n"
|
||||
+ "4. There is a bug, please report it to Doris community\n"
|
||||
+ "\n"
|
||||
+ "If an unknown stats for this column is tolerable,"
|
||||
+ "you could set session variable `forbid_unknown_col_stats` to false to make planner"
|
||||
+ " ignore this error and keep planning.", table.getName(), colName));
|
||||
} else {
|
||||
throw new AnalysisException("BE is not available!");
|
||||
}
|
||||
if (!cache.isUnKnown) {
|
||||
rowCount = Math.max(rowCount, cache.count);
|
||||
cache = setOlapPartitionInfo(table, cache);
|
||||
Histogram histogram = getColumnHistogram(table, colName);
|
||||
if (histogram != null) {
|
||||
ColumnStatisticBuilder columnStatisticBuilder =
|
||||
new ColumnStatisticBuilder(cache).setHistogram(histogram);
|
||||
columnStatisticMap.put(slotReference, columnStatisticBuilder.build());
|
||||
cache = columnStatisticBuilder.build();
|
||||
}
|
||||
columnStatisticMap.put(slotReference, cache);
|
||||
continue;
|
||||
}
|
||||
rowCount = Math.max(rowCount, cache.count);
|
||||
cache = setOlapPartitionInfo(table, cache);
|
||||
Histogram histogram = getColumnHistogram(table, colName);
|
||||
if (histogram != null) {
|
||||
ColumnStatisticBuilder columnStatisticBuilder =
|
||||
new ColumnStatisticBuilder(cache).setHistogram(histogram);
|
||||
columnStatisticMap.put(slotReference, columnStatisticBuilder.build());
|
||||
cache = columnStatisticBuilder.build();
|
||||
}
|
||||
columnStatisticMap.put(slotReference, cache);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user