diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java index c27eebeb66..7d9763ea69 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java @@ -166,6 +166,10 @@ public class SlotRef extends Expr { this.desc = desc; } + public void setAnalyzed(boolean analyzed) { + isAnalyzed = analyzed; + } + public boolean columnEqual(Expr srcExpr) { Preconditions.checkState(srcExpr instanceof SlotRef); SlotRef srcSlotRef = (SlotRef) srcExpr; @@ -258,9 +262,12 @@ public class SlotRef extends Expr { } StringBuilder sb = new StringBuilder(); - + String subColumnPaths = ""; + if (subColPath != null && !subColPath.isEmpty()) { + subColumnPaths = "." + String.join(".", subColPath); + } if (tblName != null) { - return tblName.toSql() + "." + label; + return tblName.toSql() + "." + label + subColumnPaths; } else if (label != null) { if (ConnectContext.get() != null && ConnectContext.get().getState().isNereids() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index 4a19ab9254..d8fa683027 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -28,8 +28,10 @@ import org.apache.doris.nereids.trees.expressions.ExprId; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.plans.ObjectId; import org.apache.doris.nereids.trees.plans.RelationId; +import org.apache.doris.nereids.trees.plans.algebra.Relation; import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.qe.ConnectContext; @@ -44,7 +46,9 @@ import com.google.common.collect.Sets; import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -99,6 +103,19 @@ public class StatementContext { private final List joinFilters = new ArrayList<>(); private final List hints = new ArrayList<>(); + // Root Slot -> Paths -> Sub-column Slots + private final Map, SlotReference>> subColumnSlotRefMap + = Maps.newHashMap(); + + // Map from rewritten slot to original expr + private final Map subColumnOriginalExprMap = Maps.newHashMap(); + + // Map from original expr to rewritten slot + private final Map originalExprToRewrittenSubColumn = Maps.newHashMap(); + + // Map slot to its relation, currently used in SlotReference to find its original + // Relation for example LogicalOlapScan + private final Map slotToRelation = Maps.newHashMap(); public StatementContext() { this.connectContext = ConnectContext.get(); @@ -149,6 +166,62 @@ public class StatementContext { return joinCount; } + public Set getAllPathsSlots() { + Set allSlotReferences = Sets.newHashSet(); + for (Map, SlotReference> slotReferenceMap : subColumnSlotRefMap.values()) { + allSlotReferences.addAll(slotReferenceMap.values()); + } + return allSlotReferences; + } + + public Expression getOriginalExpr(SlotReference rewriteSlot) { + return subColumnOriginalExprMap.getOrDefault(rewriteSlot, null); + } + + public Slot getRewrittenSlotRefByOriginalExpr(Expression originalExpr) { + return originalExprToRewrittenSubColumn.getOrDefault(originalExpr, null); + } + + /** + * Add a slot ref attached with paths in context to avoid duplicated slot + */ + public void addPathSlotRef(Slot root, List paths, SlotReference slotRef, Expression originalExpr) { + Comparator> pathsComparator = new Comparator>() { + @Override + public int compare(List lst1, List lst2) { + Iterator it1 = lst1.iterator(); + Iterator it2 = lst2.iterator(); + while (it1.hasNext() && it2.hasNext()) { + int result = it1.next().compareTo(it2.next()); + if (result != 0) { + return result; + } + } + return Integer.compare(lst1.size(), lst2.size()); + } + }; + subColumnSlotRefMap.computeIfAbsent(root, k -> Maps.newTreeMap(pathsComparator)); + subColumnSlotRefMap.get(root).put(paths, slotRef); + subColumnOriginalExprMap.put(slotRef, originalExpr); + originalExprToRewrittenSubColumn.put(originalExpr, slotRef); + } + + public SlotReference getPathSlot(Slot root, List paths) { + Map, SlotReference> pathsSlotsMap = subColumnSlotRefMap.getOrDefault(root, null); + if (pathsSlotsMap == null) { + return null; + } + return pathsSlotsMap.getOrDefault(paths, null); + } + + public void addSlotToRelation(Slot slot, Relation relation) { + slotToRelation.put(slot, relation); + } + + public Relation getRelationBySlot(Slot slot) { + return slotToRelation.getOrDefault(slot, null); + } + public boolean isDpHyp() { return isDpHyp; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java index f63f9062dd..dbc083915f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java @@ -88,8 +88,10 @@ import org.apache.doris.nereids.trees.expressions.functions.combinator.StateComb import org.apache.doris.nereids.trees.expressions.functions.combinator.UnionCombinator; import org.apache.doris.nereids.trees.expressions.functions.generator.TableGeneratingFunction; import org.apache.doris.nereids.trees.expressions.functions.scalar.ArrayMap; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.expressions.functions.scalar.HighOrderFunction; import org.apache.doris.nereids.trees.expressions.functions.scalar.Lambda; +import org.apache.doris.nereids.trees.expressions.functions.scalar.PushDownToProjectionFunction; import org.apache.doris.nereids.trees.expressions.functions.scalar.ScalarFunction; import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdaf; import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdf; @@ -98,6 +100,7 @@ import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionVisitor; import org.apache.doris.nereids.types.DataType; +import org.apache.doris.qe.ConnectContext; import org.apache.doris.thrift.TFunctionBinaryType; import com.google.common.base.Preconditions; @@ -194,12 +197,29 @@ public class ExpressionTranslator extends DefaultExpressionVisitor invertedIndexCharFilter = new HashMap<>(); - SlotRef left = (SlotRef) match.left().accept(this, context); + SlotRef left = (SlotRef) match.left().getInputSlots().stream().findFirst().get().accept(this, context); OlapTable olapTbl = Optional.ofNullable(getOlapTableFromSlotDesc(left.getDesc())) .orElse(getOlapTableDirectly(left)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index b20d073814..5acda0f298 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -81,6 +81,7 @@ import org.apache.doris.nereids.trees.expressions.VirtualSlotReference; import org.apache.doris.nereids.trees.expressions.WindowFrame; import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateParam; +import org.apache.doris.nereids.trees.expressions.functions.scalar.PushDownToProjectionFunction; import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral; import org.apache.doris.nereids.trees.plans.AbstractPlan; import org.apache.doris.nereids.trees.plans.AggMode; @@ -1619,6 +1620,32 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor targetExpr = expression.collectFirst(PushDownToProjectionFunction.class::isInstance); + if (!targetExpr.isEmpty()) { + return targetExpr.get(0); + } + return null; + } + + // register rewritten slots from original PushDownToProjectionFunction + private void registerRewrittenSlot(PhysicalProject project, OlapScanNode olapScanNode) { + // register slots that are rewritten from element_at/etc.. + for (NamedExpression expr : project.getProjects()) { + if (context != null + && context.getConnectContext() != null + && context.getConnectContext().getStatementContext() != null) { + Slot rewrittenSlot = context.getConnectContext() + .getStatementContext().getRewrittenSlotRefByOriginalExpr(getOriginalFunctionForRewritten(expr)); + if (rewrittenSlot != null) { + TupleDescriptor tupleDescriptor = context.getTupleDesc(olapScanNode.getTupleId()); + context.createSlotDesc(tupleDescriptor, (SlotReference) rewrittenSlot); + } + } + } + } + // TODO: generate expression mapping when be project could do in ExecNode. @Override public PlanFragment visitPhysicalProject(PhysicalProject project, PlanTranslatorContext context) { @@ -1633,6 +1660,12 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor projectionExprs = project.getProjects() .stream() .map(e -> ExpressionTranslator.translate(e, context)) @@ -1706,7 +1739,7 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor(projectionExprs).size() != projectionExprs.size() || projectionExprs.stream().anyMatch(expr -> !(expr instanceof SlotRef))) { projectionTuple = generateTupleDesc(slots, - ((ScanNode) inputPlanNode).getTupleDesc().getTable(), context); + ((ScanNode) inputPlanNode).getTupleDesc().getTable(), context); inputPlanNode.setProjectList(projectionExprs); inputPlanNode.setOutputTupleDesc(projectionTuple); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java index 77b386c79e..1aa98bb492 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java @@ -274,6 +274,11 @@ public class PlanTranslatorContext { slotDescriptor.setLabel(slotReference.getName()); } else { slotRef = new SlotRef(slotDescriptor); + if (slotReference.hasSubColPath()) { + slotDescriptor.setSubColLables(slotReference.getSubColPath()); + slotDescriptor.setMaterializedColumnName(slotRef.getColumnName() + + "." + String.join(".", slotReference.getSubColPath())); + } } slotRef.setTable(table); slotRef.setLabel(slotReference.getName()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java index 95fb019ad5..2ae15d08d4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java @@ -25,6 +25,7 @@ import org.apache.doris.nereids.rules.analysis.BindExpression; import org.apache.doris.nereids.rules.analysis.BindRelation; import org.apache.doris.nereids.rules.analysis.BindRelation.CustomTableResolver; import org.apache.doris.nereids.rules.analysis.BindSink; +import org.apache.doris.nereids.rules.analysis.BindSlotWithPaths; import org.apache.doris.nereids.rules.analysis.CheckAfterBind; import org.apache.doris.nereids.rules.analysis.CheckAnalysis; import org.apache.doris.nereids.rules.analysis.CheckPolicy; @@ -134,6 +135,7 @@ public class Analyzer extends AbstractBatchJobExecutor { new UserAuthentication() ), bottomUp(new BindExpression()), + bottomUp(new BindSlotWithPaths()), topDown(new BindSink()), bottomUp(new CheckAfterBind()), bottomUp( @@ -161,6 +163,7 @@ public class Analyzer extends AbstractBatchJobExecutor { // errCode = 2, detailMessage = GROUP BY expression must not contain aggregate functions: sum(lo_tax) bottomUp(new CheckAnalysis()), topDown(new EliminateGroupByConstant()), + topDown(new NormalizeAggregate()), topDown(new HavingToFilter()), bottomUp(new SemiJoinCommute()), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushDownFilterThroughProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushDownFilterThroughProject.java index 643a4ee509..821ac47e44 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushDownFilterThroughProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushDownFilterThroughProject.java @@ -42,6 +42,10 @@ public class PushDownFilterThroughProject extends PlanPostProcessor { } PhysicalProject project = (PhysicalProject) child; + if (project.isPulledUpProjectFromScan()) { + // ignore project which is pulled up from LogicalOlapScan + return filter; + } PhysicalFilter newFilter = filter.withConjunctsAndChild( ExpressionUtils.replace(filter.getConjuncts(), project.getAliasToProducer()), project.child()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/Validator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/Validator.java index a46b454f9a..392788575c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/Validator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/Validator.java @@ -59,7 +59,7 @@ public class Validator extends PlanPostProcessor { Plan child = filter.child(); // Forbidden filter-project, we must make filter-project -> project-filter. - if (child instanceof PhysicalProject) { + if (child instanceof PhysicalProject && !((PhysicalProject) child).isPulledUpProjectFromScan()) { throw new AnalysisException( "Nereids generate a filter-project plan, but backend not support:\n" + filter.treeString()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index 29c053a8b3..5f4c784d7d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -49,8 +49,11 @@ public enum RuleType { BINDING_UNBOUND_TVF_RELATION_FUNCTION(RuleTypeClass.REWRITE), BINDING_SET_OPERATION_SLOT(RuleTypeClass.REWRITE), BINDING_INLINE_TABLE_SLOT(RuleTypeClass.REWRITE), - COUNT_LITERAL_REWRITE(RuleTypeClass.REWRITE), + BINDING_SLOT_WITH_PATHS_PROJECT(RuleTypeClass.REWRITE), + + BINDING_SLOT_WITH_PATHS_SCAN(RuleTypeClass.REWRITE), + REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT(RuleTypeClass.REWRITE), FILL_UP_HAVING_AGGREGATE(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotWithPaths.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotWithPaths.java new file mode 100644 index 0000000000..a904bcf919 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotWithPaths.java @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.qe.ConnectContext; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Rule to bind slot with path in query plan. + * Slots with paths do not exist in OlapTable so in order to materialize them, + * generate a LogicalProject on LogicalOlapScan which merges both slots from LogicalOlapScan + * and alias functions from original expressions before rewritten. + */ +public class BindSlotWithPaths implements AnalysisRuleFactory { + + @Override + public List buildRules() { + return ImmutableList.of( + // only scan + RuleType.BINDING_SLOT_WITH_PATHS_SCAN.build( + logicalOlapScan().whenNot(LogicalOlapScan::isProjectPulledUp).thenApply(ctx -> { + if (ConnectContext.get() != null + && ConnectContext.get().getSessionVariable() != null + && !ConnectContext.get().getSessionVariable().isEnableRewriteElementAtToSlot()) { + return ctx.root; + } + LogicalOlapScan logicalOlapScan = ctx.root; + List newProjectsExpr = new ArrayList<>(logicalOlapScan.getOutput()); + Set pathsSlots = ctx.statementContext.getAllPathsSlots(); + // With new logical properties that contains new slots with paths + StatementContext stmtCtx = ConnectContext.get().getStatementContext(); + List olapScanPathSlots = pathsSlots.stream().filter( + slot -> { + Preconditions.checkNotNull(stmtCtx.getRelationBySlot(slot), + "[Not implemented] Slot not found in relation map, slot ", slot); + return stmtCtx.getRelationBySlot(slot).getRelationId() + == logicalOlapScan.getRelationId(); + }).collect( + Collectors.toList()); + List newExprs = olapScanPathSlots.stream() + .map(SlotReference.class::cast) + .map(slotReference -> + new Alias(slotReference.getExprId(), + stmtCtx.getOriginalExpr(slotReference), slotReference.getName())) + .collect( + Collectors.toList()); + if (newExprs.isEmpty()) { + return ctx.root; + } + newProjectsExpr.addAll(newExprs); + return new LogicalProject(newProjectsExpr, logicalOlapScan.withProjectPulledUp()); + })) + ); + } +} + diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java index 56d5273e62..402ac833d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java @@ -41,6 +41,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalDeferMaterializeOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalSort; import org.apache.doris.nereids.trees.plans.logical.LogicalTopN; import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; @@ -181,7 +182,9 @@ public class CheckAfterRewrite extends OneAnalysisRuleFactory { if (plan.getExpressions().stream().anyMatch( expression -> expression instanceof Match)) { if (plan instanceof LogicalFilter && (plan.child(0) instanceof LogicalOlapScan - || plan.child(0) instanceof LogicalDeferMaterializeOlapScan)) { + || plan.child(0) instanceof LogicalDeferMaterializeOlapScan + || plan.child(0) instanceof LogicalProject + && ((LogicalProject) plan.child(0)).isPulledUpProjectFromScan())) { return; } else { throw new AnalysisException(String.format( diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SlotBinder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SlotBinder.java index e25aa20262..5c96ff8bd2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SlotBinder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SlotBinder.java @@ -183,7 +183,8 @@ public class SlotBinder extends SubExprAnalyzer { List slots = getScope().getSlots() .stream() .filter(slot -> !(slot instanceof SlotReference) - || (((SlotReference) slot).isVisible()) || showHidden) + || (((SlotReference) slot).isVisible()) || showHidden) + .filter(slot -> !(((SlotReference) slot).hasSubColPath())) .collect(Collectors.toList()); switch (qualifier.size()) { case 0: // select * @@ -264,6 +265,11 @@ public class SlotBinder extends SubExprAnalyzer { private List bindSlot(UnboundSlot unboundSlot, List boundSlots) { return boundSlots.stream().distinct().filter(boundSlot -> { + if (boundSlot instanceof SlotReference + && ((SlotReference) boundSlot).hasSubColPath()) { + // already bounded + return false; + } List nameParts = unboundSlot.getNameParts(); int qualifierSize = boundSlot.getQualifier().size(); int namePartsSize = nameParts.size(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionOptimization.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionOptimization.java index e7b3a308f0..6064a8d210 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionOptimization.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionOptimization.java @@ -21,6 +21,7 @@ import org.apache.doris.nereids.rules.expression.rules.ArrayContainToArrayOverla import org.apache.doris.nereids.rules.expression.rules.CaseWhenToIf; import org.apache.doris.nereids.rules.expression.rules.DateFunctionRewrite; import org.apache.doris.nereids.rules.expression.rules.DistinctPredicatesRule; +import org.apache.doris.nereids.rules.expression.rules.ElementAtToSlot; import org.apache.doris.nereids.rules.expression.rules.ExtractCommonFactorRule; import org.apache.doris.nereids.rules.expression.rules.OrToIn; import org.apache.doris.nereids.rules.expression.rules.SimplifyComparisonPredicate; @@ -48,7 +49,8 @@ public class ExpressionOptimization extends ExpressionRewrite { OrToIn.INSTANCE, ArrayContainToArrayOverlap.INSTANCE, CaseWhenToIf.INSTANCE, - TopnToMax.INSTANCE + TopnToMax.INSTANCE, + ElementAtToSlot.INSTANCE ); private static final ExpressionRuleExecutor EXECUTOR = new ExpressionRuleExecutor(OPTIMIZE_REWRITE_RULES); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java index be939ba2f6..d7a6085dca 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java @@ -52,6 +52,10 @@ public class CheckCast extends AbstractExpressionRewriteRule { } private boolean check(DataType originalType, DataType targetType) { + if (originalType.isVariantType() && (targetType instanceof PrimitiveType || targetType.isArrayType())) { + // variant could cast to primitive types and array + return true; + } if (originalType.isNullType()) { return true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/ElementAtToSlot.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/ElementAtToSlot.java new file mode 100644 index 0000000000..050c2bb396 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/ElementAtToSlot.java @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.expression.rules; + +import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext; +import org.apache.doris.nereids.rules.expression.ExpressionRewriteRule; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; +import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral; +import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter; +import org.apache.doris.qe.ConnectContext; + +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * Transform element_at function to SlotReference for variant sub-column access. + * This optimization will help query engine to prune as many sub columns as possible + * to speed up query. + * eg: element_at(element_at(v, "a"), "b") -> SlotReference(column=v, subColLabels=["a", "b"]) + */ +public class ElementAtToSlot extends DefaultExpressionRewriter implements + ExpressionRewriteRule { + + public static final ElementAtToSlot INSTANCE = new ElementAtToSlot(); + + @Override + public Expression rewrite(Expression expr, ExpressionRewriteContext ctx) { + return expr.accept(this, ctx); + } + + /** + * Rewrites an {@link ElementAt} instance to a {@link SlotReference}. + * This method is used to transform an ElementAt expr into a SlotReference, + * based on the provided topColumnSlot and the context of the statement. + * + * @param elementAt The {@link ElementAt} instance to be rewritten. + * @param topColumnSlot The {@link SlotReference} that represents the top column slot. + * @return A {@link SlotReference} that represents the rewritten element. + * If a target column slot is found in the context, it is returned to avoid duplicates. + * Otherwise, a new SlotReference is created and added to the context. + */ + public static Expression rewriteToSlot(ElementAt elementAt, SlotReference topColumnSlot) { + // rewrite to slotRef + StatementContext ctx = ConnectContext.get().getStatementContext(); + List fullPaths = elementAt.collectToList(node -> node instanceof VarcharLiteral).stream() + .map(node -> ((VarcharLiteral) node).getValue()) + .collect(Collectors.toList()); + SlotReference targetColumnSlot = ctx.getPathSlot(topColumnSlot, fullPaths); + if (targetColumnSlot != null) { + // avoid duplicated slots + return targetColumnSlot; + } + SlotReference slotRef = new SlotReference(StatementScopeIdGenerator.newExprId(), + topColumnSlot.getName(), topColumnSlot.getDataType(), + topColumnSlot.nullable(), topColumnSlot.getQualifier(), + topColumnSlot.getColumn().get(), Optional.of(topColumnSlot.getInternalName()), + fullPaths); + ctx.addPathSlotRef(topColumnSlot, fullPaths, slotRef, elementAt); + ctx.addSlotToRelation(slotRef, ctx.getRelationBySlot(topColumnSlot)); + + return slotRef; + } + + @Override + public Expression visitElementAt(ElementAt elementAt, ExpressionRewriteContext context) { + // todo + return elementAt; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java index ba39266da9..76d3b6748e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java @@ -45,13 +45,16 @@ import org.apache.doris.nereids.trees.expressions.ListQuery; import org.apache.doris.nereids.trees.expressions.Match; import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.TimestampArithmetic; import org.apache.doris.nereids.trees.expressions.WhenClause; import org.apache.doris.nereids.trees.expressions.functions.BoundFunction; import org.apache.doris.nereids.trees.expressions.functions.FunctionBuilder; import org.apache.doris.nereids.trees.expressions.functions.agg.Count; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; import org.apache.doris.nereids.trees.expressions.functions.scalar.Lambda; import org.apache.doris.nereids.trees.expressions.functions.scalar.Nvl; +import org.apache.doris.nereids.trees.expressions.functions.scalar.PushDownToProjectionFunction; import org.apache.doris.nereids.trees.expressions.functions.udf.AliasUdfBuilder; import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; import org.apache.doris.nereids.trees.expressions.typecoercion.ImplicitCastInputTypes; @@ -60,6 +63,7 @@ import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.types.BooleanType; import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.util.TypeCoercionUtils; +import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableList; import org.apache.commons.lang3.StringUtils; @@ -194,6 +198,25 @@ public class FunctionBinder extends AbstractExpressionRewriteRule { return TypeCoercionUtils.processBoundFunction(boundFunction); } + @Override + public Expression visitElementAt(ElementAt elementAt, ExpressionRewriteContext context) { + if (PushDownToProjectionFunction.validToPushDown(elementAt)) { + if (ConnectContext.get() != null + && ConnectContext.get().getSessionVariable() != null + && !ConnectContext.get().getSessionVariable().isEnableRewriteElementAtToSlot()) { + throw new AnalysisException( + "set enable_rewrite_element_at_to_slot=true when using element_at function for variant type"); + } + Slot slot = elementAt.getInputSlots().stream().findFirst().get(); + if (slot.hasUnbound()) { + slot = (Slot) super.visit(slot, context); + } + // rewrite to slot and bound this slot + return ElementAtToSlot.rewriteToSlot(elementAt, (SlotReference) slot); + } + return visitBoundFunction(elementAt, context); + } + /** * gets the method for calculating the time. * e.g. YEARS_ADD、YEARS_SUB、DAYS_ADD 、DAYS_SUB @@ -331,9 +354,11 @@ public class FunctionBinder extends AbstractExpressionRewriteRule { // check child type if (!left.getDataType().isStringLikeType() && !(left.getDataType() instanceof ArrayType - && ((ArrayType) left.getDataType()).getItemType().isStringLikeType())) { + && ((ArrayType) left.getDataType()).getItemType().isStringLikeType()) + && !left.getDataType().isVariantType()) { throw new AnalysisException(String.format( - "left operand '%s' part of predicate " + "'%s' should return type 'STRING' or 'ARRAY' but " + "left operand '%s' part of predicate " + + "'%s' should return type 'STRING', 'ARRAY or VARIANT' but " + "returns type '%s'.", left.toSql(), match.toSql(), left.getDataType())); } @@ -344,6 +369,10 @@ public class FunctionBinder extends AbstractExpressionRewriteRule { + "returns type '%s'.", right.toSql(), match.toSql(), right.getDataType())); } + + if (left.getDataType().isVariantType()) { + left = new Cast(left, right.getDataType()); + } return match.withChildren(left, right); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMatchExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMatchExpression.java index dbfe480baf..907d34c07c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMatchExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMatchExpression.java @@ -20,6 +20,7 @@ package org.apache.doris.nereids.rules.rewrite; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Cast; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Match; import org.apache.doris.nereids.trees.expressions.SlotReference; @@ -47,7 +48,10 @@ public class CheckMatchExpression extends OneRewriteRuleFactory { for (Expression expr : expressions) { if (expr instanceof Match) { Match matchExpression = (Match) expr; - if (!(matchExpression.left() instanceof SlotReference) + boolean isSlotReference = matchExpression.left() instanceof SlotReference; + boolean isCastChildWithSlotReference = (matchExpression.left() instanceof Cast + && matchExpression.left().child(0) instanceof SlotReference); + if (!(isSlotReference || isCastChildWithSlotReference) || !(matchExpression.right() instanceof Literal)) { throw new AnalysisException(String.format("Only support match left operand is SlotRef," + " right operand is Literal. But meet expression %s", matchExpression)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java index 15516e0501..c91945e029 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java @@ -83,7 +83,8 @@ public class DeferMaterializeTopNResult implements RewriteRuleFactory { LogicalTopN logicalTopN, Optional> logicalFilter, LogicalOlapScan logicalOlapScan) { Column rowId = new Column(Column.ROWID_COL, Type.STRING, false, null, false, "", "rowid column"); - SlotReference columnId = SlotReference.fromColumn(rowId, logicalOlapScan.getQualifier()); + SlotReference columnId = SlotReference.fromColumn(rowId, + logicalOlapScan.getQualifier(), logicalOlapScan); Set deferredMaterializedExprIds = Sets.newHashSet(logicalOlapScan.getOutputExprIdSet()); logicalFilter.ifPresent(filter -> filter.getConjuncts() .forEach(e -> deferredMaterializedExprIds.removeAll(e.getInputSlotExprIds()))); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java index 65106ff8f8..34514a3b34 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java @@ -44,6 +44,7 @@ public class PushDownFilterThroughProject implements RewriteRuleFactory { logicalFilter(logicalProject()) .whenNot(filter -> filter.child().getProjects().stream().anyMatch( expr -> expr.anyMatch(WindowExpression.class::isInstance))) + .whenNot(filter -> filter.child().isPulledUpProjectFromScan()) .then(PushDownFilterThroughProject::pushdownFilterThroughProject) .toRule(RuleType.PUSH_DOWN_FILTER_THROUGH_PROJECT), // filter(project(limit)) will change to filter(limit(project)) by PushdownProjectThroughLimit, @@ -51,6 +52,7 @@ public class PushDownFilterThroughProject implements RewriteRuleFactory { logicalFilter(logicalLimit(logicalProject())) .whenNot(filter -> filter.child().child().getProjects().stream() .anyMatch(expr -> expr.anyMatch(WindowExpression.class::isInstance))) + .whenNot(filter -> filter.child().child().isPulledUpProjectFromScan()) .then(filter -> { LogicalLimit> limit = filter.child(); LogicalProject project = limit.child(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java index 877f792e50..e8574b36e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java @@ -71,11 +71,15 @@ public class Alias extends NamedExpression implements UnaryExpression { @Override public Slot toSlot() throws UnboundException { + SlotReference slotReference = child() instanceof SlotReference + ? (SlotReference) child() : null; return new SlotReference(exprId, name, child().getDataType(), child().nullable(), qualifier, - child() instanceof SlotReference - ? ((SlotReference) child()).getColumn().orElse(null) + slotReference != null + ? slotReference.getColumn().orElse(null) : null, - nameFromChild ? Optional.of(child().toString()) : Optional.of(name)); + nameFromChild ? Optional.of(child().toString()) : Optional.of(name), slotReference != null + ? slotReference.getSubColPath() + : null); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ArrayItemReference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ArrayItemReference.java index 226e8d1f9b..522779b508 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ArrayItemReference.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ArrayItemReference.java @@ -142,7 +142,7 @@ public class ArrayItemReference extends NamedExpression implements ExpectsInputT * @param nullable true if nullable */ public ArrayItemSlot(ExprId exprId, String name, DataType dataType, boolean nullable) { - super(exprId, name, dataType, nullable, ImmutableList.of(), null, Optional.empty()); + super(exprId, name, dataType, nullable, ImmutableList.of(), null, Optional.empty(), null); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java index db0f4e6635..afb31f7162 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java @@ -19,7 +19,9 @@ package org.apache.doris.nereids.trees.expressions; import org.apache.doris.catalog.Column; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.trees.plans.algebra.Relation; import org.apache.doris.nereids.types.DataType; +import org.apache.doris.qe.ConnectContext; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -39,6 +41,10 @@ public class SlotReference extends Slot { protected final boolean nullable; protected final List qualifier; + // The sub column path to access type like struct or variant + // e.g. For accessing variant["a"]["b"], the parsed paths is ["a", "b"] + protected final List subColPath; + // the unique string representation of a SlotReference // different SlotReference will have different internalName // TODO: remove this member variable after mv selection is refactored @@ -47,25 +53,31 @@ public class SlotReference extends Slot { private final Column column; public SlotReference(String name, DataType dataType) { - this(StatementScopeIdGenerator.newExprId(), name, dataType, true, ImmutableList.of(), null, Optional.empty()); + this(StatementScopeIdGenerator.newExprId(), name, dataType, true, ImmutableList.of(), + null, Optional.empty(), null); } public SlotReference(String name, DataType dataType, boolean nullable) { this(StatementScopeIdGenerator.newExprId(), name, dataType, nullable, ImmutableList.of(), - null, Optional.empty()); + null, Optional.empty(), null); } public SlotReference(String name, DataType dataType, boolean nullable, List qualifier) { - this(StatementScopeIdGenerator.newExprId(), name, dataType, nullable, qualifier, null, Optional.empty()); + this(StatementScopeIdGenerator.newExprId(), name, dataType, nullable, qualifier, null, Optional.empty(), null); } public SlotReference(ExprId exprId, String name, DataType dataType, boolean nullable, List qualifier) { - this(exprId, name, dataType, nullable, qualifier, null, Optional.empty()); + this(exprId, name, dataType, nullable, qualifier, null, Optional.empty(), null); } public SlotReference(ExprId exprId, String name, DataType dataType, boolean nullable, List qualifier, @Nullable Column column) { - this(exprId, name, dataType, nullable, qualifier, column, Optional.empty()); + this(exprId, name, dataType, nullable, qualifier, column, Optional.empty(), null); + } + + public SlotReference(ExprId exprId, String name, DataType dataType, boolean nullable, + List qualifier, @Nullable Column column, Optional internalName) { + this(exprId, name, dataType, nullable, qualifier, column, internalName, null); } /** @@ -78,32 +90,59 @@ public class SlotReference extends Slot { * @param qualifier slot reference qualifier * @param column the column which this slot come from * @param internalName the internalName of this slot + * @param subColLabels subColumn access labels */ public SlotReference(ExprId exprId, String name, DataType dataType, boolean nullable, - List qualifier, @Nullable Column column, Optional internalName) { + List qualifier, @Nullable Column column, Optional internalName, + List subColLabels) { this.exprId = exprId; this.name = name; this.dataType = dataType; this.qualifier = ImmutableList.copyOf(Objects.requireNonNull(qualifier, "qualifier can not be null")); this.nullable = nullable; this.column = column; - this.internalName = internalName.isPresent() ? internalName : Optional.of(name); + this.subColPath = subColLabels; + if (subColLabels != null && !this.subColPath.isEmpty()) { + // Modify internal name to distinguish from different sub-columns of same top level column, + // using the `.` to connect each part of paths + String fullName = internalName.orElse(name) + String.join(".", this.subColPath); + this.internalName = Optional.of(fullName); + } else { + this.internalName = internalName.isPresent() ? internalName : Optional.of(name); + } } public static SlotReference of(String name, DataType type) { return new SlotReference(name, type); } - public static SlotReference fromColumn(Column column, List qualifier) { + /** + * get SlotReference from a column + * @param column the column which contains type info + * @param qualifier the qualifier of SlotReference + * @param relation the relation which column is from + */ + public static SlotReference fromColumn(Column column, List qualifier, Relation relation) { DataType dataType = DataType.fromCatalogType(column.getType()); - return new SlotReference(StatementScopeIdGenerator.newExprId(), column.getName(), dataType, - column.isAllowNull(), qualifier, column, Optional.empty()); + SlotReference slot = new SlotReference(StatementScopeIdGenerator.newExprId(), column.getName(), dataType, + column.isAllowNull(), qualifier, column, Optional.empty(), null); + if (relation != null && ConnectContext.get() != null + && ConnectContext.get().getStatementContext() != null) { + ConnectContext.get().getStatementContext().addSlotToRelation(slot, relation); + } + return slot; } public static SlotReference fromColumn(Column column, String name, List qualifier) { DataType dataType = DataType.fromCatalogType(column.getType()); return new SlotReference(StatementScopeIdGenerator.newExprId(), name, dataType, - column.isAllowNull(), qualifier, column, Optional.empty()); + column.isAllowNull(), qualifier, column, Optional.empty(), null); + } + + public static boolean containsPathsSlotReference(Expression expression) { + return expression.collectToList(SlotReference.class::isInstance) + .stream().anyMatch(expr -> { + return ((SlotReference) expr).hasSubColPath(); }); } @Override @@ -204,25 +243,33 @@ public class SlotReference extends Slot { if (this.nullable == newNullable) { return this; } - return new SlotReference(exprId, name, dataType, newNullable, qualifier, column, internalName); + return new SlotReference(exprId, name, dataType, newNullable, qualifier, column, internalName, subColPath); } @Override public SlotReference withQualifier(List qualifier) { - return new SlotReference(exprId, name, dataType, nullable, qualifier, column, internalName); + return new SlotReference(exprId, name, dataType, nullable, qualifier, column, internalName, subColPath); } @Override public SlotReference withName(String name) { - return new SlotReference(exprId, name, dataType, nullable, qualifier, column, internalName); + return new SlotReference(exprId, name, dataType, nullable, qualifier, column, internalName, subColPath); } @Override public SlotReference withExprId(ExprId exprId) { - return new SlotReference(exprId, name, dataType, nullable, qualifier, column, internalName); + return new SlotReference(exprId, name, dataType, nullable, qualifier, column, internalName, subColPath); } public boolean isVisible() { return column == null || column.isVisible(); } + + public List getSubColPath() { + return subColPath; + } + + public boolean hasSubColPath() { + return subColPath != null && !subColPath.isEmpty(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java index 1d47e9f368..6bd5f1bd8e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java @@ -26,6 +26,8 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.types.MapType; +import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.types.VariantType; import org.apache.doris.nereids.types.coercion.AnyDataType; import org.apache.doris.nereids.types.coercion.FollowToAnyDataType; @@ -38,11 +40,13 @@ import java.util.List; * ScalarFunction 'element_at'. This class is generated by GenerateFunction. */ public class ElementAt extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable { + implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable, PushDownToProjectionFunction { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(new FollowToAnyDataType(0)) .args(ArrayType.of(new AnyDataType(0)), BigIntType.INSTANCE), + FunctionSignature.ret(new VariantType()) + .args(new VariantType(), VarcharType.SYSTEM_DEFAULT), FunctionSignature.ret(new FollowToAnyDataType(1)) .args(MapType.of(new AnyDataType(0), new AnyDataType(1)), new FollowToAnyDataType(0)) ); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/PushDownToProjectionFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/PushDownToProjectionFunction.java new file mode 100644 index 0000000000..321e08f881 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/PushDownToProjectionFunction.java @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.nereids.trees.expressions.Expression; + +/** + * Function that could be rewritten and pushed down to projection + */ +public interface PushDownToProjectionFunction { + // check if specified function could be pushed down to project + static boolean validToPushDown(Expression pushDownExpr) { + // Currently only Variant type could be pushed down + return pushDownExpr instanceof PushDownToProjectionFunction && pushDownExpr.getDataType().isVariantType(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Project.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Project.java index 9238f86954..4561353c84 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Project.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Project.java @@ -18,12 +18,16 @@ package org.apache.doris.nereids.trees.plans.algebra; import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.ExprId; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.scalar.PushDownToProjectionFunction; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.PlanUtils; +import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableMap; @@ -64,6 +68,22 @@ public interface Project { return PlanUtils.mergeProjections(childProject.getProjects(), getProjects()); } + /** + * Check if it is a project that is pull up from scan in analyze rule + * e.g. BindSlotWithPaths + */ + default boolean isPulledUpProjectFromScan() { + return ConnectContext.get() != null + && ConnectContext.get().getSessionVariable() != null + && ConnectContext.get().getSessionVariable().isEnableRewriteElementAtToSlot() + && getProjects().stream().allMatch(namedExpr -> + namedExpr instanceof SlotReference + || (namedExpr instanceof Alias + && PushDownToProjectionFunction.validToPushDown(((Alias) namedExpr).child()))) + && getProjects().stream().anyMatch((namedExpr -> namedExpr instanceof Alias + && PushDownToProjectionFunction.validToPushDown(((Alias) namedExpr).child()))); + } + /** * find projects, if not found the slot, then throw AnalysisException */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java index 0023b4bd59..e0512715c5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java @@ -143,7 +143,8 @@ public class UpdateCommand extends Command implements ForwardWithSync, Explainab } boolean isPartialUpdate = targetTable.getEnableUniqueKeyMergeOnWrite() - && selectItems.size() < targetTable.getColumns().size(); + && selectItems.size() < targetTable.getColumns().size() + && !targetTable.hasVariantColumns(); // make UnboundTableSink return new UnboundTableSink<>(nameParts, ImmutableList.of(), ImmutableList.of(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java index e50a049e0f..743365f782 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java @@ -93,7 +93,7 @@ public abstract class LogicalCatalogRelation extends LogicalRelation implements public List computeOutput() { return table.getBaseSchema() .stream() - .map(col -> SlotReference.fromColumn(col, qualified())) + .map(col -> SlotReference.fromColumn(col, qualified(), this)) .collect(ImmutableList.toImmutableList()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java index f496eaa47f..4e9174097c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java @@ -67,6 +67,11 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan */ private final boolean indexSelected; + /* + * Status to indicate a new project pulled up from this logicalOlapScan + */ + private final boolean projectPulledUp; + /** * Status to indicate using pre-aggregation or not. */ @@ -119,7 +124,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan table.getPartitionIds(), false, ImmutableList.of(), -1, false, PreAggStatus.on(), ImmutableList.of(), ImmutableList.of(), - Maps.newHashMap(), Optional.empty(), false); + Maps.newHashMap(), Optional.empty(), false, false); } public LogicalOlapScan(RelationId id, OlapTable table, List qualifier, List tabletIds, @@ -127,7 +132,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan this(id, table, qualifier, Optional.empty(), Optional.empty(), table.getPartitionIds(), false, tabletIds, -1, false, PreAggStatus.on(), ImmutableList.of(), hints, Maps.newHashMap(), - tableSample, false); + tableSample, false, false); } public LogicalOlapScan(RelationId id, OlapTable table, List qualifier, List specifiedPartitions, @@ -136,7 +141,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan // must use specifiedPartitions here for prune partition by sql like 'select * from t partition p1' specifiedPartitions, false, tabletIds, -1, false, PreAggStatus.on(), specifiedPartitions, hints, Maps.newHashMap(), - tableSample, false); + tableSample, false, false); } public LogicalOlapScan(RelationId id, OlapTable table, List qualifier, List tabletIds, @@ -144,7 +149,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan this(id, table, qualifier, Optional.empty(), Optional.empty(), table.getPartitionIds(), false, tabletIds, selectedIndexId, true, PreAggStatus.off("For direct index scan."), - ImmutableList.of(), hints, Maps.newHashMap(), tableSample, true); + ImmutableList.of(), hints, Maps.newHashMap(), tableSample, true, false); } /** @@ -156,7 +161,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan List selectedTabletIds, long selectedIndexId, boolean indexSelected, PreAggStatus preAggStatus, List specifiedPartitions, List hints, Map, Slot> cacheSlotWithSlotName, - Optional tableSample, boolean directMvScan) { + Optional tableSample, boolean directMvScan, boolean projectPulledUp) { super(id, PlanType.LOGICAL_OLAP_SCAN, table, qualifier, groupExpression, logicalProperties); Preconditions.checkArgument(selectedPartitionIds != null, @@ -175,6 +180,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan "mvNameToSlot can not be null"); this.tableSample = tableSample; this.directMvScan = directMvScan; + this.projectPulledUp = projectPulledUp; } public List getSelectedPartitionIds() { @@ -231,7 +237,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan groupExpression, Optional.of(getLogicalProperties()), selectedPartitionIds, partitionPruned, selectedTabletIds, selectedIndexId, indexSelected, preAggStatus, manuallySpecifiedPartitions, - hints, cacheSlotWithSlotName, tableSample, directMvScan); + hints, cacheSlotWithSlotName, tableSample, directMvScan, projectPulledUp); } @Override @@ -240,7 +246,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan return new LogicalOlapScan(relationId, (Table) table, qualifier, groupExpression, logicalProperties, selectedPartitionIds, partitionPruned, selectedTabletIds, selectedIndexId, indexSelected, preAggStatus, manuallySpecifiedPartitions, - hints, cacheSlotWithSlotName, tableSample, directMvScan); + hints, cacheSlotWithSlotName, tableSample, directMvScan, projectPulledUp); } public LogicalOlapScan withSelectedPartitionIds(List selectedPartitionIds) { @@ -248,7 +254,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan Optional.empty(), Optional.of(getLogicalProperties()), selectedPartitionIds, true, selectedTabletIds, selectedIndexId, indexSelected, preAggStatus, manuallySpecifiedPartitions, - hints, cacheSlotWithSlotName, tableSample, directMvScan); + hints, cacheSlotWithSlotName, tableSample, directMvScan, projectPulledUp); } public LogicalOlapScan withMaterializedIndexSelected(PreAggStatus preAgg, long indexId) { @@ -256,7 +262,19 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan Optional.empty(), Optional.of(getLogicalProperties()), selectedPartitionIds, partitionPruned, selectedTabletIds, indexId, true, preAgg, manuallySpecifiedPartitions, hints, cacheSlotWithSlotName, - tableSample, directMvScan); + tableSample, directMvScan, projectPulledUp); + } + + public boolean isProjectPulledUp() { + return projectPulledUp; + } + + public LogicalOlapScan withProjectPulledUp() { + return new LogicalOlapScan(relationId, (Table) table, qualifier, + Optional.empty(), Optional.of(getLogicalProperties()), + selectedPartitionIds, partitionPruned, selectedTabletIds, + selectedIndexId, indexSelected, preAggStatus, manuallySpecifiedPartitions, hints, cacheSlotWithSlotName, + tableSample, directMvScan, true); } public LogicalOlapScan withSelectedTabletIds(List selectedTabletIds) { @@ -264,7 +282,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan Optional.empty(), Optional.of(getLogicalProperties()), selectedPartitionIds, partitionPruned, selectedTabletIds, selectedIndexId, indexSelected, preAggStatus, manuallySpecifiedPartitions, - hints, cacheSlotWithSlotName, tableSample, directMvScan); + hints, cacheSlotWithSlotName, tableSample, directMvScan, projectPulledUp); } public LogicalOlapScan withPreAggStatus(PreAggStatus preAggStatus) { @@ -272,7 +290,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan Optional.empty(), Optional.of(getLogicalProperties()), selectedPartitionIds, partitionPruned, selectedTabletIds, selectedIndexId, indexSelected, preAggStatus, manuallySpecifiedPartitions, - hints, cacheSlotWithSlotName, tableSample, directMvScan); + hints, cacheSlotWithSlotName, tableSample, directMvScan, projectPulledUp); } @Override @@ -316,7 +334,7 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan if (cacheSlotWithSlotName.containsKey(Pair.of(selectedIndexId, col.getName()))) { return cacheSlotWithSlotName.get(Pair.of(selectedIndexId, col.getName())); } - Slot slot = SlotReference.fromColumn(col, qualified()); + Slot slot = SlotReference.fromColumn(col, qualified(), this); cacheSlotWithSlotName.put(Pair.of(selectedIndexId, col.getName()), slot); return slot; }).collect(ImmutableList.toImmutableList()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTVFRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTVFRelation.java index f5842c30dc..fa48a27d4d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTVFRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTVFRelation.java @@ -99,7 +99,7 @@ public class LogicalTVFRelation extends LogicalRelation implements TVFRelation, public List computeOutput() { return function.getTable().getBaseSchema() .stream() - .map(col -> SlotReference.fromColumn(col, qualifier)) + .map(col -> SlotReference.fromColumn(col, qualifier, this)) .collect(ImmutableList.toImmutableList()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCatalogRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCatalogRelation.java index 5537b4dd7c..c1c98b6139 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCatalogRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCatalogRelation.java @@ -101,7 +101,7 @@ public abstract class PhysicalCatalogRelation extends PhysicalRelation implement public List computeOutput() { return table.getBaseSchema() .stream() - .map(col -> SlotReference.fromColumn(col, qualified())) + .map(col -> SlotReference.fromColumn(col, qualified(), this)) .collect(ImmutableList.toImmutableList()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalTVFRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalTVFRelation.java index 955ea2f45d..07cb6c44d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalTVFRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalTVFRelation.java @@ -107,7 +107,7 @@ public class PhysicalTVFRelation extends PhysicalRelation implements TVFRelation public List computeOutput() { return function.getTable().getBaseSchema() .stream() - .map(col -> SlotReference.fromColumn(col, ImmutableList.of())) + .map(col -> SlotReference.fromColumn(col, ImmutableList.of(), this)) .collect(ImmutableList.toImmutableList()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java index 999e888156..b568c556c5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java @@ -379,6 +379,8 @@ public abstract class DataType { return DecimalV2Type.createDecimalV2Type(precision, scale); } else if (type.isJsonbType()) { return JsonType.INSTANCE; + } else if (type.isVariantType()) { + return VariantType.INSTANCE; } else if (type.isStructType()) { List structFields = ((org.apache.doris.catalog.StructType) (type)).getFields().stream() .map(cf -> new StructField(cf.getName(), fromCatalogType(cf.getType()), @@ -620,6 +622,10 @@ public abstract class DataType { return this instanceof MapType; } + public boolean isVariantType() { + return this instanceof VariantType; + } + public boolean isStructType() { return this instanceof StructType; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java new file mode 100644 index 0000000000..14818f39bb --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.types; + +import org.apache.doris.catalog.Type; +import org.apache.doris.nereids.annotation.Developing; +import org.apache.doris.nereids.types.coercion.PrimitiveType; + +import java.util.Objects; + +/** + * Variant type in Nereids. + * Why Variant is not complex type? Since it's nested structure is not pre-defined, then using + * primitive type will be easy to handle meta info in FE. + */ +@Developing +public class VariantType extends PrimitiveType { + + public static final VariantType INSTANCE = new VariantType(); + + public static final int WIDTH = 24; + + @Override + public Type toCatalogDataType() { + return Type.VARIANT; + } + + @Override + public boolean acceptsType(DataType other) { + return other instanceof VariantType; + } + + @Override + public String simpleString() { + return "map"; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + return super.equals(o); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode()); + } + + @Override + public int width() { + return WIDTH; + } + + @Override + public String toSql() { + return "VARIANT"; + } + + @Override + public String toString() { + return toSql(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java index ffa41b1c63..310f8ca636 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java @@ -97,6 +97,7 @@ import org.apache.doris.nereids.types.TimeType; import org.apache.doris.nereids.types.TimeV2Type; import org.apache.doris.nereids.types.TinyIntType; import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.types.VariantType; import org.apache.doris.nereids.types.coercion.AnyDataType; import org.apache.doris.nereids.types.coercion.CharacterType; import org.apache.doris.nereids.types.coercion.FollowToAnyDataType; @@ -177,6 +178,9 @@ public class TypeCoercionUtils { } } return Optional.of(new StructType(newFields)); + } else if (input instanceof VariantType && (expected.isNumericType() || expected.isStringLikeType())) { + // variant could implicit cast to numric types and string like types + return Optional.of(expected); } else { return implicitCastPrimitive(input, expected); } @@ -1271,6 +1275,13 @@ public class TypeCoercionUtils { return Optional.of(IPv6Type.INSTANCE); } + // variant type + if ((leftType.isVariantType() && (rightType.isStringLikeType() || rightType.isNumericType()))) { + return Optional.of(rightType); + } + if ((rightType.isVariantType() && (leftType.isStringLikeType() || leftType.isNumericType()))) { + return Optional.of(leftType); + } return Optional.of(DoubleType.INSTANCE); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index e28d9d5c82..db7971b672 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -94,6 +94,7 @@ import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import org.apache.commons.collections.CollectionUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -200,6 +201,8 @@ public class OlapScanNode extends ScanNode { private boolean shouldColoScan = false; + protected List rewrittenProjectList; + // cached for prepared statement to quickly prune partition // only used in short circuit plan at present private final PartitionPruneV2ForShortCircuitPlan cachedPartitionPruner = @@ -255,6 +258,10 @@ public class OlapScanNode extends ScanNode { } } + public void setRewrittenProjectList(List rewrittenProjectList) { + this.rewrittenProjectList = rewrittenProjectList; + } + public void setTableSample(TableSample tSample) { this.tableSample = tSample; } @@ -1303,6 +1310,11 @@ public class OlapScanNode extends ScanNode { output.append(prefix).append("SHORT-CIRCUIT"); } + if (!CollectionUtils.isEmpty(rewrittenProjectList)) { + output.append(prefix).append("rewrittenProjectList: ").append( + getExplainString(rewrittenProjectList)).append("\n"); + } + return output.toString(); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/PredicatesSplitterTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/PredicatesSplitterTest.java index 8bef8afe7a..5f24770ca1 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/PredicatesSplitterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/PredicatesSplitterTest.java @@ -102,7 +102,7 @@ public class PredicatesSplitterTest extends ExpressionRewriteTestHelper { String name = ((UnboundSlot) expression).getName(); mem.putIfAbsent(name, SlotReference.fromColumn( new Column(name, getType(name.charAt(0)).toCatalogDataType()), - Lists.newArrayList("table"))); + Lists.newArrayList("table"), null)); return mem.get(name); } return hasNewChildren ? expression.withChildren(children) : expression; diff --git a/regression-test/data/variant_p0/load.out b/regression-test/data/variant_p0/load.out index 3a701a5db7..dd77b508b0 100644 --- a/regression-test/data/variant_p0/load.out +++ b/regression-test/data/variant_p0/load.out @@ -143,7 +143,7 @@ [123] -- !sql_25 -- -50000 54999.99999995274 6150000 +50000 55000.00000001167 6150000 -- !sql_26 -- 5000 @@ -242,8 +242,8 @@ 2 {"updated_value":10} -- !sql_37 -- -1 {"a":""} 1 {"a":"1"} +1 {"a":"2"} 1 {"a":1} 1 {"a":1} diff --git a/regression-test/data/variant_p0/schema_change/schema_change.out b/regression-test/data/variant_p0/schema_change/schema_change.out index 0c7852ac72..4956e8311b 100644 --- a/regression-test/data/variant_p0/schema_change/schema_change.out +++ b/regression-test/data/variant_p0/schema_change/schema_change.out @@ -23,6 +23,18 @@ 1 1 +-- !sql -- +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world + -- !sql -- 1 1 @@ -47,3 +59,15 @@ 1 hello world 1 hello world +-- !sql -- +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world +1 hello world + diff --git a/regression-test/suites/variant_p0/column_name.groovy b/regression-test/suites/variant_p0/column_name.groovy index 04f939b689..69ec1a31d1 100644 --- a/regression-test/suites/variant_p0/column_name.groovy +++ b/regression-test/suites/variant_p0/column_name.groovy @@ -28,6 +28,8 @@ suite("regression_test_variant_column_name", "variant_type"){ properties("replication_num" = "1", "disable_auto_compaction" = "true"); """ + // sql "set experimental_enable_nereids_planner = false" + sql """insert into ${table_name} values (1, '{"中文" : "中文", "\\\u4E2C\\\u6587": "unicode"}')""" qt_sql """select v:中文, v:`\\\u4E2C\\\u6587` from ${table_name}""" // sql """insert into ${table_name} values (2, '{}')""" @@ -35,12 +37,12 @@ suite("regression_test_variant_column_name", "variant_type"){ sql """insert into ${table_name} values (3, '{"": ""}')""" qt_sql """select v:`` from ${table_name} order by k""" sql """insert into ${table_name} values (4, '{"!@#^&*()": "11111"}')""" - qt_sql """select cast(v:`!@#^&*()` as string) from ${table_name} order by k""" + qt_sql """select cast(v["!@#^&*()"] as string) from ${table_name} order by k""" sql """insert into ${table_name} values (5, '{"123": "456", "789": "012"}')""" - qt_sql """select cast(v:`123` as string) from ${table_name} order by k""" + qt_sql """select cast(v["123"] as string) from ${table_name} order by k""" // sql """insert into ${table_name} values (6, '{"\\n123": "t123", "\\\"123": "123"}')""" // qt_sql """select v:`\\n` from ${table_name} order by k""" sql """insert into ${table_name} values (7, '{"AA": "UPPER CASE", "aa": "lower case"}')""" - qt_sql """select cast(v:`AA` as string), cast(v:`aa` as string) from ${table_name} order by k""" + qt_sql """select cast(v["AA"] as string), cast(v["aa"] as string) from ${table_name} order by k""" } \ No newline at end of file diff --git a/regression-test/suites/variant_p0/complexjson.groovy b/regression-test/suites/variant_p0/complexjson.groovy index cceab301ba..67a4e86fd5 100644 --- a/regression-test/suites/variant_p0/complexjson.groovy +++ b/regression-test/suites/variant_p0/complexjson.groovy @@ -29,6 +29,7 @@ suite("regression_test_variant_complexjson", "variant_type_complex_json") { properties("replication_num" = "1", "disable_auto_compaction" = "true"); """ } + sql "set experimental_enable_nereids_planner = false" table_name = "complexjson" create_table table_name sql """insert into ${table_name} values (1, '{ @@ -63,7 +64,7 @@ suite("regression_test_variant_complexjson", "variant_type_complex_json") { }')""" // qt_sql """SELECT v:key_0.key_1.key_3.key_4, v:key_0.key_1.key_3.key_5, \ // v:key_0.key_1.key_3.key_6, v:key_0.key_1.key_3.key_7 FROM ${table_name} ORDER BY v:id""" - qt_sql """SELECT * from ${table_name} order by cast(v:id as int)""" + qt_sql """SELECT * from ${table_name} order by cast(v["id"] as int)""" table_name = "complexjson2" create_table table_name @@ -97,7 +98,7 @@ suite("regression_test_variant_complexjson", "variant_type_complex_json") { // v:key_1.key_2.key_3.key_4.key_6, \ // v:key_1.key_2.key_3.key_4.key_7 \ // FROM ${table_name} ORDER BY v:id""" - qt_sql """SELECT * from ${table_name} order by cast(v:id as int)""" + qt_sql """SELECT * from ${table_name} order by cast(v["id"] as int)""" table_name = "complexjson3" create_table table_name @@ -121,7 +122,7 @@ suite("regression_test_variant_complexjson", "variant_type_complex_json") { // v:key_0.key_10, \ // v:key_0.key_0 \ // FROM ${table_name} ORDER BY v:id""" - qt_sql """SELECT * from ${table_name} order by cast(v:id as int)""" + qt_sql """SELECT * from ${table_name} order by cast(v["id"] as int)""" table_name = "complexjson5" create_table table_name @@ -155,5 +156,5 @@ suite("regression_test_variant_complexjson", "variant_type_complex_json") { // v:key_0.key_1.key_2.key_5.key_6, \ // v:key_0.key_1.key_2.key_5.key_7 // FROM ${table_name} ORDER BY v:id""" - qt_sql """SELECT * from ${table_name} order by cast(v:id as int)""" + qt_sql """SELECT * from ${table_name} order by cast(v["id"] as int)""" } \ No newline at end of file diff --git a/regression-test/suites/variant_p0/insert_into_select.groovy b/regression-test/suites/variant_p0/insert_into_select.groovy index ffd0c3af0e..2c2b27065a 100644 --- a/regression-test/suites/variant_p0/insert_into_select.groovy +++ b/regression-test/suites/variant_p0/insert_into_select.groovy @@ -47,6 +47,6 @@ suite("regression_test_variant_insert_into_select", "variant_type"){ sql """insert into ${table_name}_str select * from ${table_name}_var""" sql """insert into ${table_name}_var select * from ${table_name}_str""" sql """insert into ${table_name}_var select * from ${table_name}_var""" - qt_sql "select v:a, v:b, v:c from ${table_name}_var order by k" + qt_sql """select v["a"], v["b"], v["c"] from ${table_name}_var order by k""" qt_sql "select v from ${table_name}_str order by k" } \ No newline at end of file diff --git a/regression-test/suites/variant_p0/load.groovy b/regression-test/suites/variant_p0/load.groovy index 580b15524a..29182573f5 100644 --- a/regression-test/suites/variant_p0/load.groovy +++ b/regression-test/suites/variant_p0/load.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("regression_test_variant", "variant_type"){ +suite("regression_test_variant", "nonConcurrent"){ def load_json_data = {table_name, file_name -> // load the json data @@ -366,7 +366,7 @@ suite("regression_test_variant", "variant_type"){ table_name = "all_sparse_columns" create_table.call(table_name, "1") sql """insert into ${table_name} values (1, '{"a" : 1}'), (1, '{"a": "1"}')""" - sql """insert into ${table_name} values (1, '{"a" : 1}'), (1, '{"a": ""}')""" + sql """insert into ${table_name} values (1, '{"a" : 1}'), (1, '{"a": "2"}')""" qt_sql_37 "select * from ${table_name} order by k, cast(v as string)" set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") diff --git a/regression-test/suites/variant_p0/multi_var.groovy b/regression-test/suites/variant_p0/multi_var.groovy index 1a034b9be3..5f0eb22b4e 100644 --- a/regression-test/suites/variant_p0/multi_var.groovy +++ b/regression-test/suites/variant_p0/multi_var.groovy @@ -37,7 +37,7 @@ suite("regression_test_variant_multi_var", "variant_type"){ sql "alter table ${table_name} add column ss string default null" sql """INSERT INTO ${table_name} select k, v, v, v, v from ${table_name}""" sql """DELETE FROM ${table_name} where k = 1""" - qt_sql """select cast(v:k1 as tinyint), cast(v2:k2 as text), cast(v3:k3 as string), cast(v:k7 as tinyint), cast(v2:k8 as text), cast(v3:k9 as double) from ${table_name} order by k, 1, 2, 3, 4, 5, 6 limit 10""" - qt_sql """select cast(v:k1 as tinyint), cast(v2:k2 as text), cast(v3:k3 as string), cast(v:k7 as tinyint), cast(v2:k8 as text), cast(v3:k9 as double) from ${table_name} where k > 200 order by k, 1, 2, 3, 4, 5, 6 limit 10""" - qt_sql """select cast(v:k1 as tinyint), cast(v2:k2 as text), cast(v3:k3 as string), cast(v:k7 as tinyint), cast(v2:k8 as text), cast(v3:k9 as double) from ${table_name} where k > 300 order by k, 1, 2, 3, 4, 5, 6 limit 10""" + qt_sql """select cast(v["k1"] as tinyint), cast(v2["k2"] as text), cast(v3["k3"] as string), cast(v["k7"] as tinyint), cast(v2["k8"] as text), cast(v3["k9"] as double) from ${table_name} order by k, 1, 2, 3, 4, 5, 6 limit 10""" + qt_sql """select cast(v["k1"] as tinyint), cast(v2["k2"] as text), cast(v3["k3"] as string), cast(v["k7"] as tinyint), cast(v2["k8"] as text), cast(v3["k9"] as double) from ${table_name} where k > 200 order by k, 1, 2, 3, 4, 5, 6 limit 10""" + qt_sql """select cast(v["k1"] as tinyint), cast(v2["k2"] as text), cast(v3["k3"] as string), cast(v["k7"] as tinyint), cast(v2["k8"] as text), cast(v3["k9"] as double) from ${table_name} where k > 300 order by k, 1, 2, 3, 4, 5, 6 limit 10""" } \ No newline at end of file diff --git a/regression-test/suites/variant_p0/schema_change/schema_change.groovy b/regression-test/suites/variant_p0/schema_change/schema_change.groovy index 9a9048ba5d..ff4ed1b56a 100644 --- a/regression-test/suites/variant_p0/schema_change/schema_change.groovy +++ b/regression-test/suites/variant_p0/schema_change/schema_change.groovy @@ -44,6 +44,8 @@ suite("regression_test_variant_schema_change", "variant_type"){ } assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish timeout") } + + sql "set experimental_enable_nereids_planner = true" // add, drop columns sql """INSERT INTO ${table_name} SELECT *, '{"k1":1, "k2": "hello world", "k3" : [1234], "k4" : 1.10000, "k5" : [[123]]}' FROM numbers("number" = "4096")""" sql "alter table ${table_name} add column v2 variant default null" @@ -54,7 +56,9 @@ suite("regression_test_variant_schema_change", "variant_type"){ sql "alter table ${table_name} add column vs string default null" sql """INSERT INTO ${table_name} SELECT k, v, v from ${table_name}""" qt_sql """select v:k1 from ${table_name} order by k desc limit 10""" + qt_sql """select v:k1, cast(v:k2 as string) from ${table_name} order by k desc limit 10""" + // sql "set experimental_enable_nereids_planner = true" // add, drop index sql "alter table ${table_name} add index btm_idxk (k) using bitmap ;" sql """INSERT INTO ${table_name} SELECT k, v, v from ${table_name}""" @@ -64,6 +68,9 @@ suite("regression_test_variant_schema_change", "variant_type"){ sql """INSERT INTO ${table_name} SELECT k, v, v from ${table_name} limit 1024""" wait_for_latest_op_on_table_finish(table_name, timeout) qt_sql """select v:k1 from ${table_name} order by k desc limit 10""" + qt_sql """select v:k1, cast(v:k2 as string) from ${table_name} order by k desc limit 10""" + + sql "set experimental_enable_nereids_planner = false" // add, drop materialized view createMV("""create materialized view var_order as select vs, k, v from ${table_name} order by vs""") diff --git a/regression-test/suites/variant_p0/sql/gh_data.sql b/regression-test/suites/variant_p0/sql/gh_data.sql index 2fc0ccac14..9daf28fdce 100644 --- a/regression-test/suites/variant_p0/sql/gh_data.sql +++ b/regression-test/suites/variant_p0/sql/gh_data.sql @@ -2,15 +2,15 @@ set exec_mem_limit=8G; set enable_two_phase_read_opt = true; set topn_opt_limit_threshold = 1024; SELECT count() from ghdata; -SELECT cast(v:repo.name as string), count() AS stars FROM ghdata WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, cast(v:repo.name as string) LIMIT 5; -SELECT max(cast(cast(v:`id` as string) as bigint)) FROM ghdata; -SELECT sum(cast(cast(v:`id` as string) as bigint)) FROM ghdata; -SELECT sum(cast(v:payload.member.id as bigint)) FROM ghdata; -SELECT sum(cast(v:payload.pull_request.milestone.creator.site_admin as bigint)) FROM ghdata; -SELECT sum(length(v:payload.pull_request.base.repo.html_url)) FROM ghdata; -SELECT v:payload.member.id FROM ghdata where cast(v:payload.member.id as string) is not null ORDER BY k LIMIT 10; +SELECT cast(v["repo"]["name"] as string) as repo_name, count() AS stars FROM ghdata WHERE cast(v["type"] as string) = 'WatchEvent' GROUP BY repo_name ORDER BY stars DESC, repo_name LIMIT 5; +SELECT max(cast(cast(v["id"] as string) as bigint)) FROM ghdata; +SELECT sum(cast(cast(v["id"] as string) as bigint)) FROM ghdata; +SELECT sum(cast(v["payload"]["member"]["id"] as bigint)) FROM ghdata; +SELECT sum(cast(v["payload"]["pull_request"]["milestone"]["creator"]["site_admin"] as bigint)) FROM ghdata; +SELECT sum(length(v["payload"]["pull_request"]["base"]["repo"]["html_url"])) FROM ghdata; +SELECT v["payload"]["member"]["id"] as member_id FROM ghdata where cast(v["payload"]["member"]["id"] as string) is not null ORDER BY k LIMIT 10; -- select k, v:payload.commits.author.name AS name, e FROM ghdata as t lateral view explode(cast(v:payload.commits.author.name as array)) tm1 as e order by k limit 5; -select k, json_extract(v, '$.repo') from ghdata WHERE cast(v:type as string) = 'WatchEvent' order by k limit 10; -SELECT cast(v:payload.member.id as bigint), count() FROM ghdata where cast(v:payload.member.id as bigint) is not null group by cast(v:payload.member.id as bigint) order by 1, 2 desc LIMIT 10; -select k, cast(v:`id` as string), cast(v:type as string), cast(v:repo.name as string) from ghdata WHERE cast(v:type as string) = 'WatchEvent' order by k limit 10; -SELECT cast(v:payload.pusher_type as text) FROM ghdata where cast(v:payload.pusher_type as text) is not null ORDER BY k LIMIT 10; \ No newline at end of file +select k, json_extract(v, '$.repo') from ghdata WHERE cast(v["type"] as string) = 'WatchEvent' order by k limit 10; + SELECT cast(v["payload"]["member"]["id"] as bigint) as member_id, count() FROM ghdata where cast(v["payload"]["member"]["id"] as bigint) is not null group by member_id order by 1, 2 desc LIMIT 10; +select k, cast(v["id"] as string), cast(v["type"] as string) as type, cast(v["repo"]["name"] as string) from ghdata WHERE cast(v["type"] as string) = 'WatchEvent' order by k limit 10; +SELECT cast(v["payload"]["pusher_type"] as text) as pusher_type FROM ghdata where cast(v["payload"]["pusher_type"] as text) is not null ORDER BY k LIMIT 10; diff --git a/regression-test/suites/variant_p0/with_index/load.groovy b/regression-test/suites/variant_p0/with_index/load.groovy index f4bda1216e..75a0e146a1 100644 --- a/regression-test/suites/variant_p0/with_index/load.groovy +++ b/regression-test/suites/variant_p0/with_index/load.groovy @@ -59,13 +59,13 @@ suite("regression_test_variant_with_index", "nonConcurrent"){ properties("replication_num" = "1", "disable_auto_compaction" = "true"); """ sql """insert into var_with_index values(1, '{"a" : 0, "b": 3}', 'hello world'), (2, '{"a" : 123}', 'world'),(3, '{"a" : 123}', 'hello world')""" - qt_sql_inv_1 "select v:a from var_with_index where inv match 'hello' order by k" - qt_sql_inv_2 "select v:a from var_with_index where inv match 'hello' and cast(v:a as int) > 0 order by k" - qt_sql_inv_3 "select * from var_with_index where inv match 'hello' and cast(v:a as int) > 0 order by k" + qt_sql_inv_1 """select v["a"] from var_with_index where inv match 'hello' order by k""" + qt_sql_inv_2 """select v["a"] from var_with_index where inv match 'hello' and cast(v:a as int) > 0 order by k""" + qt_sql_inv_3 """select * from var_with_index where inv match 'hello' and cast(v["a"] as int) > 0 order by k""" sql "truncate table var_with_index" // set back configs set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") - set_be_config.call("variant_threshold_rows_to_estimate_sparse_column", "100") + set_be_config.call("variant_threshold_rows_to_estimate_sparse_column", "1000") // sql "truncate table ${table_name}" sql """insert into var_with_index values(1, '{"a1" : 0, "b1": 3}', 'hello world'), (2, '{"a2" : 123}', 'world'),(3, '{"a3" : 123}', 'hello world')""" sql """insert into var_with_index values(4, '{"b1" : 0, "b2": 3}', 'hello world'), (5, '{"b2" : 123}', 'world'),(6, '{"b3" : 123}', 'hello world')""" @@ -78,7 +78,7 @@ suite("regression_test_variant_with_index", "nonConcurrent"){ wait_for_latest_op_on_table_finish(table_name, timeout) show_result = sql "show index from ${table_name}" assertEquals(show_result.size(), 0) - qt_sql_inv4 """select v:a1 from ${table_name} where cast(v:a1 as int) = 0""" + qt_sql_inv4 """select v["a1"] from ${table_name} where cast(v:a1 as int) = 0""" qt_sql_inv5 """select * from ${table_name} order by k""" sql "create index inv_idx on ${table_name}(`inv`) using inverted" wait_for_latest_op_on_table_finish(table_name, timeout) diff --git a/regression-test/suites/variant_p0/with_index/var_index.groovy b/regression-test/suites/variant_p0/with_index/var_index.groovy index 844b83e1a2..bea90f7403 100644 --- a/regression-test/suites/variant_p0/with_index/var_index.groovy +++ b/regression-test/suites/variant_p0/with_index/var_index.groovy @@ -33,7 +33,7 @@ suite("regression_test_variant_var_index", "variant_type"){ sql """insert into var_index values(2, '{"a" : 18811, "b" : "hello world", "c" : 1181111}')""" sql """insert into var_index values(3, '{"a" : 18811, "b" : "hello wworld", "c" : 11111}')""" sql """insert into var_index values(4, '{"a" : 1234, "b" : "hello xxx world", "c" : 8181111}')""" - qt_sql """select * from var_index where cast(v:a as smallint) > 123 and cast(v:b as string) match 'hello' and cast(v:c as int) > 1024 order by k""" + qt_sql """select * from var_index where cast(v["a"] as smallint) > 123 and cast(v["b"] as string) match 'hello' and cast(v["c"] as int) > 1024 order by k""" sql """insert into var_index values(5, '{"a" : 123456789, "b" : 123456, "c" : 8181111}')""" - qt_sql """select * from var_index where cast(v:a as int) > 123 and cast(v:b as string) match 'hello' and cast(v:c as int) > 11111 order by k""" + qt_sql """select * from var_index where cast(v["a"] as int) > 123 and cast(v["b"] as string) match 'hello' and cast(v["c"] as int) > 11111 order by k""" } \ No newline at end of file