[Feature](Variant) Implement variant new sub column access method (#28484)

* [Feature](Variant) Implement variant new sub column access method

The query SELECT v["a"]["b"] from simple_var WHERE cast(v["a"]["b"] as int) = 1 encompasses three primary testing scenarios:

```
1. A basic test involving the variant data type.
2. A scenario dealing with GitHub event data in the context of a variant.
3. A case related to the TPC-H benchmark using a variant.
```
This commit is contained in:
lihangyu
2023-12-22 11:59:37 +08:00
committed by GitHub
parent 453e3c18f4
commit 13ccfa06a7
198 changed files with 7154 additions and 21 deletions

View File

@ -49,6 +49,7 @@ import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.SessionVariable;
import org.apache.doris.rewrite.BetweenToCompoundRule;
import org.apache.doris.rewrite.CompoundPredicateWriteRule;
import org.apache.doris.rewrite.ElementAtToSlotRefRule;
import org.apache.doris.rewrite.EliminateUnnecessaryFunctions;
import org.apache.doris.rewrite.EraseRedundantCastExpr;
import org.apache.doris.rewrite.ExprRewriteRule;
@ -454,6 +455,7 @@ public class Analyzer {
rules.add(RewriteIsNullIsNotNullRule.INSTANCE);
rules.add(MatchPredicateRule.INSTANCE);
rules.add(EliminateUnnecessaryFunctions.INSTANCE);
rules.add(ElementAtToSlotRefRule.INSTANCE);
List<ExprRewriteRule> onceRules = Lists.newArrayList();
onceRules.add(ExtractCommonFactorsRule.INSTANCE);
onceRules.add(InferFiltersRule.INSTANCE);

View File

@ -1068,6 +1068,13 @@ public class NativeInsertStmt extends InsertStmt {
queryStmt.foldConstant(rewriter, tQueryOptions);
}
@Override
public void rewriteElementAtToSlot(ExprRewriter rewriter, TQueryOptions tQueryOptions) throws AnalysisException {
Preconditions.checkState(isAnalyzed());
queryStmt.rewriteElementAtToSlot(rewriter, tQueryOptions);
}
@Override
public List<Expr> getResultExprs() {
return resultExprs;

View File

@ -527,6 +527,10 @@ public abstract class QueryStmt extends StatementBase implements Queriable {
}
@Override
public void rewriteElementAtToSlot(ExprRewriter rewriter, TQueryOptions tQueryOptions) throws AnalysisException {
}
/**
* register expr_id of expr and its children, if not set

View File

@ -49,6 +49,7 @@ import org.apache.doris.qe.ConnectContext;
import org.apache.doris.rewrite.ExprRewriter;
import org.apache.doris.rewrite.mvrewrite.MVSelectFailedException;
import org.apache.doris.thrift.TExprOpcode;
import org.apache.doris.thrift.TQueryOptions;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicates;
@ -1940,6 +1941,103 @@ public class SelectStmt extends QueryStmt {
}
}
@Override
public void rewriteElementAtToSlot(ExprRewriter rewriter, TQueryOptions tQueryOptions) throws AnalysisException {
// subquery
List<Subquery> subqueryExprs = Lists.newArrayList();
// select clause
for (SelectListItem item : selectList.getItems()) {
if (item.isStar()) {
continue;
}
// register expr id
registerExprId(item.getExpr());
Expr expr = rewriter.rewriteElementAtToSlot(item.getExpr(), analyzer);
if (!expr.equals(item.getExpr())) {
item.setExpr(expr);
}
// equal sub-query in select list
if (item.getExpr().contains(Predicates.instanceOf(Subquery.class))) {
item.getExpr().collect(Subquery.class, subqueryExprs);
}
}
// from clause
for (TableRef ref : fromClause) {
Preconditions.checkState(ref.isAnalyzed);
if (ref.onClause != null) {
registerExprId(ref.onClause);
ref.onClause = rewriter.rewriteElementAtToSlot(ref.onClause, analyzer);
}
if (ref instanceof InlineViewRef) {
((InlineViewRef) ref).getViewStmt().rewriteElementAtToSlot(rewriter, tQueryOptions);
}
}
if (whereClause != null) {
registerExprId(whereClause);
Expr expr = rewriter.rewriteElementAtToSlot(whereClause, analyzer);
if (!expr.equals(whereClause)) {
setWhereClause(expr);
}
whereClause.collect(Subquery.class, subqueryExprs);
}
if (havingClause != null) {
registerExprId(havingClauseAfterAnalyzed);
Expr expr = rewriter.rewriteElementAtToSlot(havingClauseAfterAnalyzed, analyzer);
if (!havingClauseAfterAnalyzed.equals(expr)) {
havingClause = expr;
havingClauseAfterAnalyzed = expr;
}
havingClauseAfterAnalyzed.collect(Subquery.class, subqueryExprs);
}
for (Subquery subquery : subqueryExprs) {
registerExprId(subquery);
subquery.getStatement().rewriteElementAtToSlot(rewriter, tQueryOptions);
}
if (groupByClause != null) {
ArrayList<Expr> groupingExprs = groupByClause.getGroupingExprs();
if (groupingExprs != null) {
ArrayList<Expr> newGroupingExpr = new ArrayList<>();
boolean rewrite = false;
for (Expr expr : groupingExprs) {
if (containAlias(expr)) {
newGroupingExpr.add(expr);
continue;
}
registerExprId(expr);
Expr rewriteExpr = rewriter.rewriteElementAtToSlot(expr, analyzer);
if (!expr.equals(rewriteExpr)) {
rewrite = true;
}
newGroupingExpr.add(rewriteExpr);
}
if (rewrite) {
groupByClause.setGroupingExpr(newGroupingExpr);
groupByClause.setOriGroupingExprs(newGroupingExpr);
}
}
}
if (orderByElements != null && orderByElementsAfterAnalyzed != null) {
for (int i = 0; i < orderByElementsAfterAnalyzed.size(); ++i) {
OrderByElement orderByElement = orderByElements.get(i);
OrderByElement orderByElementAnalyzed = orderByElementsAfterAnalyzed.get(i);
// same as above
if (containAlias(orderByElementAnalyzed.getExpr())) {
continue;
}
registerExprId(orderByElementAnalyzed.getExpr());
Expr newExpr = rewriter.rewriteElementAtToSlot(orderByElementAnalyzed.getExpr(), analyzer);
if (!orderByElementAnalyzed.getExpr().equals(newExpr)) {
orderByElementAnalyzed.setExpr(newExpr);
orderByElement.setExpr(newExpr);
}
}
}
}
@Override
public void collectExprs(Map<String, Expr> exprMap) {
// subquery

View File

@ -535,6 +535,10 @@ public class SlotRef extends Expr {
this.label = label;
}
public void setSubColPath(List<String> subColPath) {
this.subColPath = subColPath;
}
public boolean hasCol() {
return this.col != null;
}

View File

@ -211,6 +211,16 @@ public abstract class StatementBase implements ParseNode {
"foldConstant() not implemented for this stmt: " + getClass().getSimpleName());
}
/**
* rewrite element_at to slot in statement
* @throws AnalysisException
* @param rewriter
*/
public void rewriteElementAtToSlot(ExprRewriter rewriter, TQueryOptions tQueryOptions) throws AnalysisException {
throw new IllegalStateException(
"rewriteElementAtToSlot() not implemented for this stmt: " + getClass().getSimpleName());
}
public void setOrigStmt(OriginStatement origStmt) {
Preconditions.checkState(origStmt != null);
this.origStmt = origStmt;

View File

@ -125,6 +125,8 @@ public class SessionVariable implements Serializable, Writable {
public static final String PREFER_JOIN_METHOD = "prefer_join_method";
public static final String ENABLE_FOLD_CONSTANT_BY_BE = "enable_fold_constant_by_be";
public static final String ENABLE_REWRITE_ELEMENT_AT_TO_SLOT = "enable_rewrite_element_at_to_slot";
public static final String ENABLE_ODBC_TRANSCATION = "enable_odbc_transcation";
public static final String ENABLE_SQL_CACHE = "enable_sql_cache";
public static final String ENABLE_PARTITION_CACHE = "enable_partition_cache";
@ -829,6 +831,8 @@ public class SessionVariable implements Serializable, Writable {
@VariableMgr.VarAttr(name = ENABLE_FOLD_CONSTANT_BY_BE, fuzzy = true)
private boolean enableFoldConstantByBe = false;
@VariableMgr.VarAttr(name = ENABLE_REWRITE_ELEMENT_AT_TO_SLOT, fuzzy = true)
private boolean enableRewriteElementAtToSlot = true;
@VariableMgr.VarAttr(name = RUNTIME_FILTER_MODE, needForward = true)
private String runtimeFilterMode = "GLOBAL";
@ -1961,6 +1965,14 @@ public class SessionVariable implements Serializable, Writable {
return enableFoldConstantByBe;
}
public boolean isEnableRewriteElementAtToSlot() {
return enableRewriteElementAtToSlot;
}
public void setEnableRewriteElementAtToSlot(boolean rewriteElementAtToSlot) {
enableRewriteElementAtToSlot = rewriteElementAtToSlot;
}
public boolean isEnableNereidsDML() {
return enableNereidsDML;
}

View File

@ -1147,7 +1147,9 @@ public class StmtExecutor {
if (context.getSessionVariable().isEnableFoldConstantByBe()) {
// fold constant expr
parsedStmt.foldConstant(rewriter, tQueryOptions);
}
if (context.getSessionVariable().isEnableRewriteElementAtToSlot()) {
parsedStmt.rewriteElementAtToSlot(rewriter, tQueryOptions);
}
// Apply expr and subquery rewrites.
ExplainOptions explainOptions = parsedStmt.getExplainOptions();

View File

@ -0,0 +1,142 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.rewrite;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.FunctionCallExpr;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.analysis.SlotRef;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.rewrite.ExprRewriter.ClauseType;
import com.google.common.collect.Lists;
import java.util.List;
import java.util.stream.Collectors;
/**
* Transform element_at function to SlotReference for variant sub-column access.
* This optimization will help query engine to prune as many sub columns as possible
* to speed up query.
* eg: element_at(element_at(v, "a"), "b") -> SlotReference(column=v, subColLabels=["a", "b"])
*/
public class ElementAtToSlotRefRule implements ExprRewriteRule {
public static final ElementAtToSlotRefRule INSTANCE = new ElementAtToSlotRefRule();
@Override
public Expr apply(Expr expr, Analyzer analyzer, ClauseType clauseType) throws AnalysisException {
// Only check element at of variant all rewrited to slots
List<Expr> elementAtFunctions = Lists.newArrayList();
getElementAtFunction(expr, elementAtFunctions);
if (!elementAtFunctions.isEmpty()) {
throw new AnalysisException("element_at should not appear in common rewrite stage");
}
return expr;
}
private Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
if (!(isElementAtOfVariantType(expr))) {
return expr;
}
List<SlotRef> slotRefs = Lists.newArrayList();
expr.collect(SlotRef.class, slotRefs);
if (slotRefs.size() != 1) {
throw new AnalysisException("only support syntax like v[\"a\"][\"b\"][\"c\"]");
}
SlotRef slot = slotRefs.get(0);
List<Expr> pathsExpr = Lists.newArrayList();
// Traverse the expression tree to gather literals.
// For instance, consider the expression v["a"]["b"]["c"], where it's represented as
// element_at(element_at(element_at(v, 'a'), 'b'), 'c').The pathsExpr will contain
// literals ['a', 'b', 'c'] representing the sequence of keys in the structure.
expr.collect(Expr::isLiteral, pathsExpr);
List<String> fullPaths = pathsExpr.stream()
.map(node -> ((LiteralExpr) node).getStringValue())
.collect(Collectors.toList());
slot.setSubColPath(fullPaths);
slot.analyzeImpl(analyzer);
return slot;
}
// check if expr is element_at with variant slot type
private static boolean isElementAtOfVariantType(Expr expr) {
if (!(expr instanceof FunctionCallExpr)) {
return false;
}
FunctionCallExpr functionCallExpr = (FunctionCallExpr) expr;
List<SlotRef> slotRefs = Lists.newArrayList();
expr.collect(SlotRef.class, slotRefs);
if (slotRefs.size() != 1) {
return false;
}
return functionCallExpr.getFnName().getFunction().equalsIgnoreCase("element_at")
&& slotRefs.get(0).getType().isVariantType();
}
public static boolean containsElementAtFunction(Expr expr) {
List<Expr> result = Lists.newArrayList();
getElementAtFunction(expr, result);
return !result.isEmpty();
}
private static void getElementAtFunction(Expr expr, List<Expr> result) {
if (isElementAtOfVariantType(expr)) {
result.add(expr);
return;
}
for (Expr child : expr.getChildren()) {
getElementAtFunction(child, result);
}
}
public Expr rewrite(Expr inputExpr, Analyzer analyzer)
throws AnalysisException {
List<Expr> originalFunctionElementAtExprs = Lists.newArrayList();
boolean changed = false;
Expr newExpr = inputExpr.clone();
getElementAtFunction(inputExpr, originalFunctionElementAtExprs);
for (Expr expr : originalFunctionElementAtExprs) {
Expr rewriteExpr = apply(expr, analyzer);
if (inputExpr.getId().equals(expr.getId())) {
return rewriteExpr;
}
if (rewriteExpr != expr) {
changed = true;
replaceChildExpr(newExpr, expr.getId().toString(), rewriteExpr);
}
}
return changed ? newExpr : inputExpr;
}
// Find child expr which id matches key and replace this child expr
// with replacExpr and set replacExpr with same expr id.
private void replaceChildExpr(Expr expr, String key, Expr replacExpr) {
// ATTN: make sure the child order of expr keep unchanged
for (int i = 0; i < expr.getChildren().size(); i++) {
Expr child = expr.getChild(i);
if ((isElementAtOfVariantType(child)) && key.equals(child.getId().toString())) {
replacExpr.setId(child.getId());
expr.setChild(i, replacExpr);
break;
}
replaceChildExpr(child, key, replacExpr);
}
}
}

View File

@ -202,6 +202,24 @@ public class ExprRewriter {
}
}
public Expr rewriteElementAtToSlot(Expr inputExpr, Analyzer analyzer)
throws AnalysisException {
boolean changed = false;
for (ExprRewriteRule rule : rules) {
if (rule instanceof ElementAtToSlotRefRule) {
Expr newExpr = ((ElementAtToSlotRefRule) rule).rewrite(inputExpr, analyzer);
if (!newExpr.equals(inputExpr)) {
inputExpr = newExpr;
changed = true;
}
}
}
if (changed) {
++numChanges;
}
return inputExpr;
}
/**
* Applies 'rule' on the Expr tree rooted at 'expr' until there are no more
* changes.