[Feature](Variant) Implement variant new sub column access method (#28484)
* [Feature](Variant) Implement variant new sub column access method The query SELECT v["a"]["b"] from simple_var WHERE cast(v["a"]["b"] as int) = 1 encompasses three primary testing scenarios: ``` 1. A basic test involving the variant data type. 2. A scenario dealing with GitHub event data in the context of a variant. 3. A case related to the TPC-H benchmark using a variant. ```
This commit is contained in:
@ -49,6 +49,7 @@ import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.qe.SessionVariable;
|
||||
import org.apache.doris.rewrite.BetweenToCompoundRule;
|
||||
import org.apache.doris.rewrite.CompoundPredicateWriteRule;
|
||||
import org.apache.doris.rewrite.ElementAtToSlotRefRule;
|
||||
import org.apache.doris.rewrite.EliminateUnnecessaryFunctions;
|
||||
import org.apache.doris.rewrite.EraseRedundantCastExpr;
|
||||
import org.apache.doris.rewrite.ExprRewriteRule;
|
||||
@ -454,6 +455,7 @@ public class Analyzer {
|
||||
rules.add(RewriteIsNullIsNotNullRule.INSTANCE);
|
||||
rules.add(MatchPredicateRule.INSTANCE);
|
||||
rules.add(EliminateUnnecessaryFunctions.INSTANCE);
|
||||
rules.add(ElementAtToSlotRefRule.INSTANCE);
|
||||
List<ExprRewriteRule> onceRules = Lists.newArrayList();
|
||||
onceRules.add(ExtractCommonFactorsRule.INSTANCE);
|
||||
onceRules.add(InferFiltersRule.INSTANCE);
|
||||
|
||||
@ -1068,6 +1068,13 @@ public class NativeInsertStmt extends InsertStmt {
|
||||
queryStmt.foldConstant(rewriter, tQueryOptions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rewriteElementAtToSlot(ExprRewriter rewriter, TQueryOptions tQueryOptions) throws AnalysisException {
|
||||
Preconditions.checkState(isAnalyzed());
|
||||
queryStmt.rewriteElementAtToSlot(rewriter, tQueryOptions);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<Expr> getResultExprs() {
|
||||
return resultExprs;
|
||||
|
||||
@ -527,6 +527,10 @@ public abstract class QueryStmt extends StatementBase implements Queriable {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rewriteElementAtToSlot(ExprRewriter rewriter, TQueryOptions tQueryOptions) throws AnalysisException {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* register expr_id of expr and its children, if not set
|
||||
|
||||
@ -49,6 +49,7 @@ import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.rewrite.ExprRewriter;
|
||||
import org.apache.doris.rewrite.mvrewrite.MVSelectFailedException;
|
||||
import org.apache.doris.thrift.TExprOpcode;
|
||||
import org.apache.doris.thrift.TQueryOptions;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Predicates;
|
||||
@ -1940,6 +1941,103 @@ public class SelectStmt extends QueryStmt {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rewriteElementAtToSlot(ExprRewriter rewriter, TQueryOptions tQueryOptions) throws AnalysisException {
|
||||
// subquery
|
||||
List<Subquery> subqueryExprs = Lists.newArrayList();
|
||||
|
||||
// select clause
|
||||
for (SelectListItem item : selectList.getItems()) {
|
||||
if (item.isStar()) {
|
||||
continue;
|
||||
}
|
||||
// register expr id
|
||||
registerExprId(item.getExpr());
|
||||
Expr expr = rewriter.rewriteElementAtToSlot(item.getExpr(), analyzer);
|
||||
if (!expr.equals(item.getExpr())) {
|
||||
item.setExpr(expr);
|
||||
}
|
||||
// equal sub-query in select list
|
||||
if (item.getExpr().contains(Predicates.instanceOf(Subquery.class))) {
|
||||
item.getExpr().collect(Subquery.class, subqueryExprs);
|
||||
}
|
||||
}
|
||||
|
||||
// from clause
|
||||
for (TableRef ref : fromClause) {
|
||||
Preconditions.checkState(ref.isAnalyzed);
|
||||
if (ref.onClause != null) {
|
||||
registerExprId(ref.onClause);
|
||||
ref.onClause = rewriter.rewriteElementAtToSlot(ref.onClause, analyzer);
|
||||
}
|
||||
if (ref instanceof InlineViewRef) {
|
||||
((InlineViewRef) ref).getViewStmt().rewriteElementAtToSlot(rewriter, tQueryOptions);
|
||||
}
|
||||
}
|
||||
|
||||
if (whereClause != null) {
|
||||
registerExprId(whereClause);
|
||||
Expr expr = rewriter.rewriteElementAtToSlot(whereClause, analyzer);
|
||||
if (!expr.equals(whereClause)) {
|
||||
setWhereClause(expr);
|
||||
}
|
||||
whereClause.collect(Subquery.class, subqueryExprs);
|
||||
|
||||
}
|
||||
if (havingClause != null) {
|
||||
registerExprId(havingClauseAfterAnalyzed);
|
||||
Expr expr = rewriter.rewriteElementAtToSlot(havingClauseAfterAnalyzed, analyzer);
|
||||
if (!havingClauseAfterAnalyzed.equals(expr)) {
|
||||
havingClause = expr;
|
||||
havingClauseAfterAnalyzed = expr;
|
||||
}
|
||||
havingClauseAfterAnalyzed.collect(Subquery.class, subqueryExprs);
|
||||
}
|
||||
for (Subquery subquery : subqueryExprs) {
|
||||
registerExprId(subquery);
|
||||
subquery.getStatement().rewriteElementAtToSlot(rewriter, tQueryOptions);
|
||||
}
|
||||
if (groupByClause != null) {
|
||||
ArrayList<Expr> groupingExprs = groupByClause.getGroupingExprs();
|
||||
if (groupingExprs != null) {
|
||||
ArrayList<Expr> newGroupingExpr = new ArrayList<>();
|
||||
boolean rewrite = false;
|
||||
for (Expr expr : groupingExprs) {
|
||||
if (containAlias(expr)) {
|
||||
newGroupingExpr.add(expr);
|
||||
continue;
|
||||
}
|
||||
registerExprId(expr);
|
||||
Expr rewriteExpr = rewriter.rewriteElementAtToSlot(expr, analyzer);
|
||||
if (!expr.equals(rewriteExpr)) {
|
||||
rewrite = true;
|
||||
}
|
||||
newGroupingExpr.add(rewriteExpr);
|
||||
}
|
||||
if (rewrite) {
|
||||
groupByClause.setGroupingExpr(newGroupingExpr);
|
||||
groupByClause.setOriGroupingExprs(newGroupingExpr);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (orderByElements != null && orderByElementsAfterAnalyzed != null) {
|
||||
for (int i = 0; i < orderByElementsAfterAnalyzed.size(); ++i) {
|
||||
OrderByElement orderByElement = orderByElements.get(i);
|
||||
OrderByElement orderByElementAnalyzed = orderByElementsAfterAnalyzed.get(i);
|
||||
// same as above
|
||||
if (containAlias(orderByElementAnalyzed.getExpr())) {
|
||||
continue;
|
||||
}
|
||||
registerExprId(orderByElementAnalyzed.getExpr());
|
||||
Expr newExpr = rewriter.rewriteElementAtToSlot(orderByElementAnalyzed.getExpr(), analyzer);
|
||||
if (!orderByElementAnalyzed.getExpr().equals(newExpr)) {
|
||||
orderByElementAnalyzed.setExpr(newExpr);
|
||||
orderByElement.setExpr(newExpr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collectExprs(Map<String, Expr> exprMap) {
|
||||
// subquery
|
||||
|
||||
@ -535,6 +535,10 @@ public class SlotRef extends Expr {
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
public void setSubColPath(List<String> subColPath) {
|
||||
this.subColPath = subColPath;
|
||||
}
|
||||
|
||||
public boolean hasCol() {
|
||||
return this.col != null;
|
||||
}
|
||||
|
||||
@ -211,6 +211,16 @@ public abstract class StatementBase implements ParseNode {
|
||||
"foldConstant() not implemented for this stmt: " + getClass().getSimpleName());
|
||||
}
|
||||
|
||||
/**
|
||||
* rewrite element_at to slot in statement
|
||||
* @throws AnalysisException
|
||||
* @param rewriter
|
||||
*/
|
||||
public void rewriteElementAtToSlot(ExprRewriter rewriter, TQueryOptions tQueryOptions) throws AnalysisException {
|
||||
throw new IllegalStateException(
|
||||
"rewriteElementAtToSlot() not implemented for this stmt: " + getClass().getSimpleName());
|
||||
}
|
||||
|
||||
public void setOrigStmt(OriginStatement origStmt) {
|
||||
Preconditions.checkState(origStmt != null);
|
||||
this.origStmt = origStmt;
|
||||
|
||||
@ -125,6 +125,8 @@ public class SessionVariable implements Serializable, Writable {
|
||||
public static final String PREFER_JOIN_METHOD = "prefer_join_method";
|
||||
|
||||
public static final String ENABLE_FOLD_CONSTANT_BY_BE = "enable_fold_constant_by_be";
|
||||
|
||||
public static final String ENABLE_REWRITE_ELEMENT_AT_TO_SLOT = "enable_rewrite_element_at_to_slot";
|
||||
public static final String ENABLE_ODBC_TRANSCATION = "enable_odbc_transcation";
|
||||
public static final String ENABLE_SQL_CACHE = "enable_sql_cache";
|
||||
public static final String ENABLE_PARTITION_CACHE = "enable_partition_cache";
|
||||
@ -829,6 +831,8 @@ public class SessionVariable implements Serializable, Writable {
|
||||
@VariableMgr.VarAttr(name = ENABLE_FOLD_CONSTANT_BY_BE, fuzzy = true)
|
||||
private boolean enableFoldConstantByBe = false;
|
||||
|
||||
@VariableMgr.VarAttr(name = ENABLE_REWRITE_ELEMENT_AT_TO_SLOT, fuzzy = true)
|
||||
private boolean enableRewriteElementAtToSlot = true;
|
||||
@VariableMgr.VarAttr(name = RUNTIME_FILTER_MODE, needForward = true)
|
||||
private String runtimeFilterMode = "GLOBAL";
|
||||
|
||||
@ -1961,6 +1965,14 @@ public class SessionVariable implements Serializable, Writable {
|
||||
return enableFoldConstantByBe;
|
||||
}
|
||||
|
||||
public boolean isEnableRewriteElementAtToSlot() {
|
||||
return enableRewriteElementAtToSlot;
|
||||
}
|
||||
|
||||
public void setEnableRewriteElementAtToSlot(boolean rewriteElementAtToSlot) {
|
||||
enableRewriteElementAtToSlot = rewriteElementAtToSlot;
|
||||
}
|
||||
|
||||
public boolean isEnableNereidsDML() {
|
||||
return enableNereidsDML;
|
||||
}
|
||||
|
||||
@ -1147,7 +1147,9 @@ public class StmtExecutor {
|
||||
if (context.getSessionVariable().isEnableFoldConstantByBe()) {
|
||||
// fold constant expr
|
||||
parsedStmt.foldConstant(rewriter, tQueryOptions);
|
||||
|
||||
}
|
||||
if (context.getSessionVariable().isEnableRewriteElementAtToSlot()) {
|
||||
parsedStmt.rewriteElementAtToSlot(rewriter, tQueryOptions);
|
||||
}
|
||||
// Apply expr and subquery rewrites.
|
||||
ExplainOptions explainOptions = parsedStmt.getExplainOptions();
|
||||
|
||||
@ -0,0 +1,142 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.rewrite;
|
||||
|
||||
import org.apache.doris.analysis.Analyzer;
|
||||
import org.apache.doris.analysis.Expr;
|
||||
import org.apache.doris.analysis.FunctionCallExpr;
|
||||
import org.apache.doris.analysis.LiteralExpr;
|
||||
import org.apache.doris.analysis.SlotRef;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.rewrite.ExprRewriter.ClauseType;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Transform element_at function to SlotReference for variant sub-column access.
|
||||
* This optimization will help query engine to prune as many sub columns as possible
|
||||
* to speed up query.
|
||||
* eg: element_at(element_at(v, "a"), "b") -> SlotReference(column=v, subColLabels=["a", "b"])
|
||||
*/
|
||||
|
||||
public class ElementAtToSlotRefRule implements ExprRewriteRule {
|
||||
public static final ElementAtToSlotRefRule INSTANCE = new ElementAtToSlotRefRule();
|
||||
|
||||
@Override
|
||||
public Expr apply(Expr expr, Analyzer analyzer, ClauseType clauseType) throws AnalysisException {
|
||||
// Only check element at of variant all rewrited to slots
|
||||
List<Expr> elementAtFunctions = Lists.newArrayList();
|
||||
getElementAtFunction(expr, elementAtFunctions);
|
||||
if (!elementAtFunctions.isEmpty()) {
|
||||
throw new AnalysisException("element_at should not appear in common rewrite stage");
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
private Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
|
||||
if (!(isElementAtOfVariantType(expr))) {
|
||||
return expr;
|
||||
}
|
||||
List<SlotRef> slotRefs = Lists.newArrayList();
|
||||
expr.collect(SlotRef.class, slotRefs);
|
||||
if (slotRefs.size() != 1) {
|
||||
throw new AnalysisException("only support syntax like v[\"a\"][\"b\"][\"c\"]");
|
||||
}
|
||||
SlotRef slot = slotRefs.get(0);
|
||||
List<Expr> pathsExpr = Lists.newArrayList();
|
||||
// Traverse the expression tree to gather literals.
|
||||
// For instance, consider the expression v["a"]["b"]["c"], where it's represented as
|
||||
// element_at(element_at(element_at(v, 'a'), 'b'), 'c').The pathsExpr will contain
|
||||
// literals ['a', 'b', 'c'] representing the sequence of keys in the structure.
|
||||
expr.collect(Expr::isLiteral, pathsExpr);
|
||||
List<String> fullPaths = pathsExpr.stream()
|
||||
.map(node -> ((LiteralExpr) node).getStringValue())
|
||||
.collect(Collectors.toList());
|
||||
slot.setSubColPath(fullPaths);
|
||||
slot.analyzeImpl(analyzer);
|
||||
return slot;
|
||||
}
|
||||
|
||||
// check if expr is element_at with variant slot type
|
||||
private static boolean isElementAtOfVariantType(Expr expr) {
|
||||
if (!(expr instanceof FunctionCallExpr)) {
|
||||
return false;
|
||||
}
|
||||
FunctionCallExpr functionCallExpr = (FunctionCallExpr) expr;
|
||||
List<SlotRef> slotRefs = Lists.newArrayList();
|
||||
expr.collect(SlotRef.class, slotRefs);
|
||||
if (slotRefs.size() != 1) {
|
||||
return false;
|
||||
}
|
||||
return functionCallExpr.getFnName().getFunction().equalsIgnoreCase("element_at")
|
||||
&& slotRefs.get(0).getType().isVariantType();
|
||||
}
|
||||
|
||||
public static boolean containsElementAtFunction(Expr expr) {
|
||||
List<Expr> result = Lists.newArrayList();
|
||||
getElementAtFunction(expr, result);
|
||||
return !result.isEmpty();
|
||||
}
|
||||
|
||||
private static void getElementAtFunction(Expr expr, List<Expr> result) {
|
||||
if (isElementAtOfVariantType(expr)) {
|
||||
result.add(expr);
|
||||
return;
|
||||
}
|
||||
for (Expr child : expr.getChildren()) {
|
||||
getElementAtFunction(child, result);
|
||||
}
|
||||
}
|
||||
|
||||
public Expr rewrite(Expr inputExpr, Analyzer analyzer)
|
||||
throws AnalysisException {
|
||||
List<Expr> originalFunctionElementAtExprs = Lists.newArrayList();
|
||||
boolean changed = false;
|
||||
Expr newExpr = inputExpr.clone();
|
||||
getElementAtFunction(inputExpr, originalFunctionElementAtExprs);
|
||||
for (Expr expr : originalFunctionElementAtExprs) {
|
||||
Expr rewriteExpr = apply(expr, analyzer);
|
||||
if (inputExpr.getId().equals(expr.getId())) {
|
||||
return rewriteExpr;
|
||||
}
|
||||
if (rewriteExpr != expr) {
|
||||
changed = true;
|
||||
replaceChildExpr(newExpr, expr.getId().toString(), rewriteExpr);
|
||||
}
|
||||
}
|
||||
return changed ? newExpr : inputExpr;
|
||||
}
|
||||
|
||||
// Find child expr which id matches key and replace this child expr
|
||||
// with replacExpr and set replacExpr with same expr id.
|
||||
private void replaceChildExpr(Expr expr, String key, Expr replacExpr) {
|
||||
// ATTN: make sure the child order of expr keep unchanged
|
||||
for (int i = 0; i < expr.getChildren().size(); i++) {
|
||||
Expr child = expr.getChild(i);
|
||||
if ((isElementAtOfVariantType(child)) && key.equals(child.getId().toString())) {
|
||||
replacExpr.setId(child.getId());
|
||||
expr.setChild(i, replacExpr);
|
||||
break;
|
||||
}
|
||||
replaceChildExpr(child, key, replacExpr);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -202,6 +202,24 @@ public class ExprRewriter {
|
||||
}
|
||||
}
|
||||
|
||||
public Expr rewriteElementAtToSlot(Expr inputExpr, Analyzer analyzer)
|
||||
throws AnalysisException {
|
||||
boolean changed = false;
|
||||
for (ExprRewriteRule rule : rules) {
|
||||
if (rule instanceof ElementAtToSlotRefRule) {
|
||||
Expr newExpr = ((ElementAtToSlotRefRule) rule).rewrite(inputExpr, analyzer);
|
||||
if (!newExpr.equals(inputExpr)) {
|
||||
inputExpr = newExpr;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (changed) {
|
||||
++numChanges;
|
||||
}
|
||||
return inputExpr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies 'rule' on the Expr tree rooted at 'expr' until there are no more
|
||||
* changes.
|
||||
|
||||
Reference in New Issue
Block a user