[Feature](explode) support explode map type (#30151)

This commit is contained in:
amory
2024-02-22 20:01:19 +08:00
committed by yiguolei
parent caf68f3f60
commit f65876d803
18 changed files with 836 additions and 16 deletions

View File

@ -28,6 +28,8 @@ import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeJso
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeJsonArrayJsonOuter;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeJsonArrayString;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeJsonArrayStringOuter;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeMap;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeMapOuter;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeNumbers;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeNumbersOuter;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeOuter;
@ -48,6 +50,8 @@ public class BuiltinTableGeneratingFunctions implements FunctionHelper {
public final List<TableGeneratingFunc> tableGeneratingFunctions = ImmutableList.of(
tableGenerating(Explode.class, "explode"),
tableGenerating(ExplodeOuter.class, "explode_outer"),
tableGenerating(ExplodeMap.class, "explode_map"),
tableGenerating(ExplodeMapOuter.class, "explode_map_outer"),
tableGenerating(ExplodeNumbers.class, "explode_numbers"),
tableGenerating(ExplodeNumbersOuter.class, "explode_numbers_outer"),
tableGenerating(ExplodeBitmap.class, "explode_bitmap"),

View File

@ -1931,6 +1931,7 @@ public class FunctionSet<T> {
addTableFunctionWithCombinator(EXPLODE, Type.WILDCARD_DECIMAL, Function.NullableMode.ALWAYS_NULLABLE,
Lists.newArrayList(new ArrayType(Type.WILDCARD_DECIMAL)), false,
"_ZN5doris19DummyTableFunctions7explodeEPN9doris_udf15FunctionContextERKNS1_13CollectionValE");
}
public boolean isAggFunctionName(String name) {

View File

@ -1023,14 +1023,22 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
return plan;
}
String generateName = ctx.tableName.getText();
String columnName = ctx.columnName.getText();
// if later view explode map type, we need to add a project to convert map to struct
String columnName = ctx.columnNames.get(0).getText();
List<String> expandColumnNames = Lists.newArrayList();
if (ctx.columnNames.size() > 1) {
columnName = ConnectContext.get() != null
? ConnectContext.get().getStatementContext().generateColumnName() : "expand_cols";
expandColumnNames = ctx.columnNames.stream()
.map(RuleContext::getText).collect(ImmutableList.toImmutableList());
}
String functionName = ctx.functionName.getText();
List<Expression> arguments = ctx.expression().stream()
.<Expression>map(this::typedVisit)
.collect(ImmutableList.toImmutableList());
Function unboundFunction = new UnboundFunction(functionName, arguments);
return new LogicalGenerate<>(ImmutableList.of(unboundFunction),
ImmutableList.of(new UnboundSlot(generateName, columnName)), plan);
ImmutableList.of(new UnboundSlot(generateName, columnName)), ImmutableList.of(expandColumnNames), plan);
}
/**

View File

@ -53,8 +53,11 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunctio
import org.apache.doris.nereids.trees.expressions.functions.generator.TableGeneratingFunction;
import org.apache.doris.nereids.trees.expressions.functions.scalar.GroupingScalarFunction;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Lambda;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement;
import org.apache.doris.nereids.trees.expressions.functions.table.TableValuedFunction;
import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
import org.apache.doris.nereids.trees.plans.AbstractPlan;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.algebra.Aggregate;
@ -82,6 +85,8 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalTVFRelation;
import org.apache.doris.nereids.trees.plans.logical.UsingJoin;
import org.apache.doris.nereids.trees.plans.visitor.InferPlanOutputAlias;
import org.apache.doris.nereids.types.BooleanType;
import org.apache.doris.nereids.types.StructField;
import org.apache.doris.nereids.types.StructType;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.TypeCoercionUtils;
import org.apache.doris.qe.ConnectContext;
@ -93,6 +98,7 @@ import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
@ -587,7 +593,7 @@ public class BindExpression implements AnalysisRuleFactory {
// we need to do cast before set operation, because we maybe use these slot to do shuffle
// so, we must cast it before shuffle to get correct hash code.
List<List<NamedExpression>> childrenProjections = setOperation.collectChildrenProjections();
ImmutableList.Builder<List<SlotReference>> childrenOutputs = ImmutableList.builder();
Builder<List<SlotReference>> childrenOutputs = ImmutableList.builder();
Builder<Plan> newChildren = ImmutableList.builder();
for (int i = 0; i < childrenProjections.size(); i++) {
Plan newChild;
@ -608,14 +614,15 @@ public class BindExpression implements AnalysisRuleFactory {
})
),
RuleType.BINDING_GENERATE_SLOT.build(
logicalGenerate().thenApply(ctx -> {
logicalGenerate().when(AbstractPlan::canBind).thenApply(ctx -> {
LogicalGenerate<Plan> generate = ctx.root;
List<Function> boundSlotGenerators
= bindSlot(generate.getGenerators(), generate.child(), ctx.cascadesContext);
List<Function> boundFunctionGenerators = boundSlotGenerators.stream()
.map(f -> bindTableGeneratingFunction((UnboundFunction) f, ctx.root, ctx.cascadesContext))
.collect(Collectors.toList());
Builder<Slot> slotBuilder = ImmutableList.builder();
ImmutableList.Builder<Slot> slotBuilder = ImmutableList.builder();
List<Alias> expandAlias = Lists.newArrayList();
for (int i = 0; i < generate.getGeneratorOutput().size(); i++) {
Function generator = boundFunctionGenerators.get(i);
UnboundSlot slot = (UnboundSlot) generate.getGeneratorOutput().get(i);
@ -624,8 +631,34 @@ public class BindExpression implements AnalysisRuleFactory {
Slot boundSlot = new SlotReference(slot.getNameParts().get(1), generator.getDataType(),
generator.nullable(), ImmutableList.of(slot.getNameParts().get(0)));
slotBuilder.add(boundSlot);
// the boundSlot may has two situation:
// 1. the expandColumnsAlias is not empty, we should use make boundSlot expand to multi alias
// 2. the expandColumnsAlias is empty, we should use origin boundSlot
if (generate.getExpandColumnAlias() != null && i < generate.getExpandColumnAlias().size()
&& !CollectionUtils.isEmpty(generate.getExpandColumnAlias().get(i))) {
// if the alias is not empty, we should bind it with struct_element as child expr with alias
// struct_element(#expand_col#k, #k) as #k
// struct_element(#expand_col#v, #v) as #v
List<StructField> fields = ((StructType) boundSlot.getDataType()).getFields();
for (int idx = 0; idx < fields.size(); ++idx) {
expandAlias.add(new Alias(new StructElement(
boundSlot, new StringLiteral(fields.get(idx).getName())),
generate.getExpandColumnAlias().get(i).get(idx)));
}
}
}
return new LogicalGenerate<>(boundFunctionGenerators, slotBuilder.build(), generate.child());
LogicalGenerate ret = new LogicalGenerate<>(
boundFunctionGenerators, slotBuilder.build(), generate.child());
if (expandAlias.size() > 0) {
// we need a project to deal with explode(map) to struct with field alias
// project should contains: generator.child slot + expandAlias
List<NamedExpression> allProjectSlots = generate.child().getOutput().stream()
.map(NamedExpression.class::cast)
.collect(Collectors.toList());
allProjectSlots.addAll(expandAlias);
return new LogicalProject<>(allProjectSlots, ret);
}
return ret;
})
),
RuleType.BINDING_UNBOUND_TVF_RELATION_FUNCTION.build(

View File

@ -0,0 +1,80 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.trees.expressions.functions.generator;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.MapType;
import org.apache.doris.nereids.types.StructField;
import org.apache.doris.nereids.types.StructType;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import java.util.List;
/**
* explode({"amory":1, "doris": 2}) generate two column and two lines with:
* key column: amory, doris
* value column: 1, 2
*/
public class ExplodeMap extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable {
/**
* constructor with 1 argument.
*/
public ExplodeMap(Expression arg) {
super("explode_map", arg);
}
/**
* withChildren.
*/
@Override
public ExplodeMap withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 1);
return new ExplodeMap(children.get(0));
}
@Override
public void checkLegalityBeforeTypeCoercion() {
if (!(child().getDataType() instanceof MapType)) {
throw new AnalysisException("only support map type for explode_map function but got "
+ child().getDataType());
}
}
@Override
public List<FunctionSignature> getSignatures() {
return ImmutableList.of(
FunctionSignature.ret(new StructType(ImmutableList.of(
new StructField("col1", ((MapType) child().getDataType()).getKeyType(), true, ""),
new StructField("col2", ((MapType) child().getDataType()).getValueType(), true, ""))))
.args(child().getDataType())
);
}
@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitExplodeMap(this, context);
}
}

View File

@ -0,0 +1,80 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.trees.expressions.functions.generator;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.literal.StructLiteral;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.MapType;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import java.util.List;
/**
* explode({"amory":1, "doris": 2}) generate two column and two lines with:
* key column: amory, doris
* value column: 1, 2
*/
public class ExplodeMapOuter extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable {
/**
* constructor with 1 argument.
*/
public ExplodeMapOuter(Expression arg) {
super("explode_map_outer", arg);
}
/**
* withChildren.
*/
@Override
public ExplodeMapOuter withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 1);
return new ExplodeMapOuter(children.get(0));
}
@Override
public void checkLegalityBeforeTypeCoercion() {
if (!(child().getDataType() instanceof MapType)) {
throw new AnalysisException("only support map type for explode_map function but got "
+ child().getDataType());
}
}
@Override
public List<FunctionSignature> getSignatures() {
return ImmutableList.of(
FunctionSignature.ret(StructLiteral.constructStructType(
Lists.newArrayList(((MapType) child().getDataType()).getKeyType(),
((MapType) child().getDataType()).getValueType())))
.args(child().getDataType())
);
}
@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitExplodeMapOuter(this, context);
}
}

View File

@ -20,6 +20,7 @@ package org.apache.doris.nereids.trees.expressions.literal;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.types.StructField;
@ -30,6 +31,7 @@ import com.google.common.collect.ImmutableList;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
/**
* struct literal
@ -139,11 +141,16 @@ public class StructLiteral extends Literal {
return visitor.visitStructLiteral(this, context);
}
public static StructType computeDataType(List<? extends Expression> fields) {
public static StructType constructStructType(List<DataType> fieldTypes) {
ImmutableList.Builder<StructField> structFields = ImmutableList.builder();
for (int i = 0; i < fields.size(); i++) {
structFields.add(new StructField("col" + (i + 1), fields.get(i).getDataType(), true, ""));
for (int i = 0; i < fieldTypes.size(); i++) {
structFields.add(new StructField("col" + (i + 1), fieldTypes.get(i), true, ""));
}
return new StructType(structFields.build());
}
public static StructType computeDataType(List<? extends Expression> fields) {
List<DataType> fieldTypes = fields.stream().map(ExpressionTrait::getDataType).collect(Collectors.toList());
return constructStructType(fieldTypes);
}
}

View File

@ -28,6 +28,8 @@ import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeJso
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeJsonArrayJsonOuter;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeJsonArrayString;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeJsonArrayStringOuter;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeMap;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeMapOuter;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeNumbers;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeNumbersOuter;
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeOuter;
@ -49,6 +51,14 @@ public interface TableGeneratingFunctionVisitor<R, C> {
return visitTableGeneratingFunction(explodeOuter, context);
}
default R visitExplodeMap(ExplodeMap explode, C context) {
return visitTableGeneratingFunction(explode, context);
}
default R visitExplodeMapOuter(ExplodeMapOuter explodeOuter, C context) {
return visitTableGeneratingFunction(explodeOuter, context);
}
default R visitExplodeNumbers(ExplodeNumbers explodeNumbers, C context) {
return visitTableGeneratingFunction(explodeNumbers, context);
}

View File

@ -47,17 +47,25 @@ public class LogicalGenerate<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD
private final List<Function> generators;
private final List<Slot> generatorOutput;
// mapping with function.
private final List<List<String>> expandColumnAlias;
public LogicalGenerate(List<Function> generators, List<Slot> generatorOutput, CHILD_TYPE child) {
this(generators, generatorOutput, Optional.empty(), Optional.empty(), child);
this(generators, generatorOutput, ImmutableList.of(), Optional.empty(), Optional.empty(), child);
}
public LogicalGenerate(List<Function> generators, List<Slot> generatorOutput,
public LogicalGenerate(List<Function> generators, List<Slot> generatorOutput, List<List<String>> expandColumnAlias,
CHILD_TYPE child) {
this(generators, generatorOutput, expandColumnAlias, Optional.empty(), Optional.empty(), child);
}
public LogicalGenerate(List<Function> generators, List<Slot> generatorOutput, List<List<String>> expandColumnAlias,
Optional<GroupExpression> groupExpression,
Optional<LogicalProperties> logicalProperties, CHILD_TYPE child) {
super(PlanType.LOGICAL_GENERATE, groupExpression, logicalProperties, child);
this.generators = ImmutableList.copyOf(generators);
this.generatorOutput = ImmutableList.copyOf(generatorOutput);
this.expandColumnAlias = ImmutableList.copyOf(expandColumnAlias);
}
public List<Function> getGenerators() {
@ -68,10 +76,19 @@ public class LogicalGenerate<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD
return generatorOutput;
}
public List<List<String>> getExpandColumnAlias() {
return expandColumnAlias;
}
public LogicalGenerate<Plan> withExpandColumnAlias(List<List<String>> expandColumnAlias) {
return new LogicalGenerate<>(generators, generatorOutput, expandColumnAlias,
Optional.empty(), Optional.of(getLogicalProperties()), child());
}
@Override
public LogicalGenerate<Plan> withChildren(List<Plan> children) {
Preconditions.checkArgument(children.size() == 1);
return new LogicalGenerate<>(generators, generatorOutput, children.get(0));
return new LogicalGenerate<>(generators, generatorOutput, expandColumnAlias, children.get(0));
}
@Override
@ -93,13 +110,13 @@ public class LogicalGenerate<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD
for (int i = 0; i < generators.size(); i++) {
newGeneratorOutput.add(generatorOutput.get(i).withNullable(generators.get(i).nullable()));
}
return new LogicalGenerate<>(generators, newGeneratorOutput,
return new LogicalGenerate<>(generators, newGeneratorOutput, expandColumnAlias,
Optional.empty(), Optional.of(getLogicalProperties()), child());
}
@Override
public LogicalGenerate<Plan> withGroupExpression(Optional<GroupExpression> groupExpression) {
return new LogicalGenerate<>(generators, generatorOutput,
return new LogicalGenerate<>(generators, generatorOutput, expandColumnAlias,
groupExpression, Optional.of(getLogicalProperties()), child());
}
@ -107,7 +124,8 @@ public class LogicalGenerate<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD
public Plan withGroupExprLogicalPropChildren(Optional<GroupExpression> groupExpression,
Optional<LogicalProperties> logicalProperties, List<Plan> children) {
Preconditions.checkArgument(children.size() == 1);
return new LogicalGenerate<>(generators, generatorOutput, groupExpression, logicalProperties, children.get(0));
return new LogicalGenerate<>(generators, generatorOutput, expandColumnAlias,
groupExpression, logicalProperties, children.get(0));
}
@Override