[Vectorized] Support order by aggregate function (#11187)

Co-authored-by: lihaopeng <lihaopeng@baidu.com>
This commit is contained in:
HappenLee
2022-07-28 09:12:58 +08:00
committed by GitHub
parent fccc5e3097
commit 0b1d06bfd6
16 changed files with 192 additions and 141 deletions

View File

@ -4950,8 +4950,8 @@ non_pred_expr ::=
{: RESULT = new FunctionCallExpr(fn_name, exprs); :}
//| function_name:fn_name LPAREN RPAREN
//{: RESULT = new FunctionCallExpr(fn_name, new ArrayList<Expr>()); :}
//| function_name:fn_name LPAREN function_params:params RPAREN
//{: RESULT = new FunctionCallExpr(fn_name, params); :}
| function_name:fn_name LPAREN function_params:params order_by_clause:o RPAREN
{: RESULT = new FunctionCallExpr(fn_name, params, o); :}
| analytic_expr:e
{: RESULT = e; :}
/* Since "IF" is a keyword, need to special case this function */

View File

@ -496,6 +496,8 @@ public final class AggregateInfo extends AggregateInfoBase {
FunctionCallExpr aggExpr = FunctionCallExpr.createMergeAggCall(
inputExpr, Lists.newArrayList(aggExprParam), inputExpr.getFnParams().exprs());
aggExpr.analyzeNoThrow(analyzer);
// do not need analyze in merge stage, just do mark for BE get right function
aggExpr.setOrderByElements(inputExpr.getOrderByElements());
aggExprs.add(aggExpr);
}
@ -621,7 +623,6 @@ public final class AggregateInfo extends AggregateInfoBase {
}
Preconditions.checkState(
secondPhaseAggExprs.size() == aggregateExprs.size() + distinctAggExprs.size());
for (FunctionCallExpr aggExpr : secondPhaseAggExprs) {
aggExpr.analyzeNoThrow(analyzer);
Preconditions.checkState(aggExpr.isAggregateFunction());
@ -649,18 +650,16 @@ public final class AggregateInfo extends AggregateInfoBase {
int numDistinctParams = 0;
if (!isMultiDistinct) {
numDistinctParams = distinctAggExprs.get(0).getChildren().size();
// If we are counting distinct params of group_concat, we cannot include the custom
// separator since it is not a distinct param.
if (distinctAggExprs.get(0).getFnName().getFunction().equalsIgnoreCase("group_concat")
&& numDistinctParams == 2) {
--numDistinctParams;
}
} else {
for (int i = 0; i < distinctAggExprs.size(); i++) {
numDistinctParams += distinctAggExprs.get(i).getChildren().size();
}
}
// If we are counting distinct params of group_concat, we cannot include the custom
// separator since it is not a distinct param.
if (distinctAggExprs.get(0).getFnName().getFunction().equalsIgnoreCase("group_concat")) {
numDistinctParams = 1;
}
int numOrigGroupingExprs = inputAggInfo.getGroupingExprs().size() - numDistinctParams;
Preconditions.checkState(
slotDescs.size() == numOrigGroupingExprs + distinctAggExprs.size()

View File

@ -90,9 +90,10 @@ public class FunctionCallExpr extends Expr {
// private BuiltinAggregateFunction.Operator aggOp;
private FunctionParams fnParams;
// represent original parament from aggregate function
private FunctionParams aggFnParams;
private List<OrderByElement> orderByElements = Lists.newArrayList();
// check analytic function
private boolean isAnalyticFnCall = false;
// check table function
@ -155,6 +156,27 @@ public class FunctionCallExpr extends Expr {
this(fnName, params, false);
}
public FunctionCallExpr(
FunctionName fnName, FunctionParams params, List<OrderByElement> orderByElements) throws AnalysisException {
this(fnName, params, false);
this.orderByElements = orderByElements;
if (!orderByElements.isEmpty()) {
if (!VectorizedUtil.isVectorized()) {
throw new AnalysisException(
"ORDER BY for arguments only support in vec exec engine");
} else if (!AggregateFunction.SUPPORT_ORDER_BY_AGGREGATE_FUNCTION_NAME_SET.contains(
fnName.getFunction().toLowerCase())) {
throw new AnalysisException(
"ORDER BY not support for the function:" + fnName.getFunction().toLowerCase());
} else if (params.isDistinct()) {
throw new AnalysisException(
"ORDER BY not support for the distinct, support in the furture:"
+ fnName.getFunction().toLowerCase());
}
}
setChildren();
}
private FunctionCallExpr(
FunctionName fnName, FunctionParams params, boolean isMergeAggFn) {
super();
@ -187,6 +209,7 @@ public class FunctionCallExpr extends Expr {
protected FunctionCallExpr(FunctionCallExpr other) {
super(other);
fnName = other.fnName;
orderByElements = other.orderByElements;
isAnalyticFnCall = other.isAnalyticFnCall;
// aggOp = other.aggOp;
// fnParams = other.fnParams;
@ -289,6 +312,8 @@ public class FunctionCallExpr extends Expr {
|| fnName.getFunction().equalsIgnoreCase("sm4_decrypt")
|| fnName.getFunction().equalsIgnoreCase("sm4_encrypt"))) {
result.add("\'***\'");
} else if (orderByElements.size() > 0 && i == len - orderByElements.size()) {
result.add("ORDER BY " + children.get(i).toSql());
} else {
result.add(children.get(i).toSql());
}
@ -503,7 +528,7 @@ public class FunctionCallExpr extends Expr {
}
if (fnName.getFunction().equalsIgnoreCase("group_concat")) {
if (children.size() > 2 || children.isEmpty()) {
if (children.size() - orderByElements.size() > 2 || children.isEmpty()) {
throw new AnalysisException(
"group_concat requires one or two parameters: " + this.toSql());
}
@ -514,13 +539,14 @@ public class FunctionCallExpr extends Expr {
"group_concat requires first parameter to be of type STRING: " + this.toSql());
}
if (children.size() == 2) {
if (children.size() - orderByElements.size() == 2) {
Expr arg1 = getChild(1);
if (!arg1.type.isStringType() && !arg1.type.isNull()) {
throw new AnalysisException(
"group_concat requires second parameter to be of type STRING: " + this.toSql());
}
}
return;
}
@ -926,6 +952,15 @@ public class FunctionCallExpr extends Expr {
childTypes[2] = assignmentCompatibleType;
fn = getBuiltinFunction(fnName.getFunction(), childTypes,
Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
} else if (AggregateFunction.SUPPORT_ORDER_BY_AGGREGATE_FUNCTION_NAME_SET.contains(
fnName.getFunction().toLowerCase())) {
// order by elements add as child like windows function. so if we get the
// param of arg, we need remove the order by elements
Type[] childTypes = collectChildReturnTypes();
Type[] newChildTypes = new Type[children.size() - orderByElements.size()];
System.arraycopy(childTypes, 0, newChildTypes, 0, newChildTypes.length);
fn = getBuiltinFunction(fnName.getFunction(), newChildTypes,
Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
} else {
// now first find table function in table function sets
if (isTableFnCall) {
@ -1024,7 +1059,7 @@ public class FunctionCallExpr extends Expr {
Type[] args = fn.getArgs();
if (args.length > 0) {
// Implicitly cast all the children to match the function if necessary
for (int i = 0; i < argTypes.length; ++i) {
for (int i = 0; i < argTypes.length - orderByElements.size(); ++i) {
// For varargs, we must compare with the last type in callArgs.argTypes.
int ix = Math.min(args.length - 1, i);
if (!argTypes[i].matchesType(args[ix]) && Config.use_date_v2_by_default
@ -1327,7 +1362,6 @@ public class FunctionCallExpr extends Expr {
return result.toString();
}
@Override
public void finalizeImplForNereids() throws AnalysisException {
// TODO: support other functions
// TODO: Supports type conversion to match the type of the function's parameters
@ -1356,4 +1390,16 @@ public class FunctionCallExpr extends Expr {
public void setMergeForNereids(boolean isMergeAggFn) {
this.isMergeAggFn = isMergeAggFn;
}
public List<OrderByElement> getOrderByElements() {
return orderByElements;
}
public void setOrderByElements(List<OrderByElement> orderByElements) {
this.orderByElements = orderByElements;
}
private void setChildren() {
orderByElements.forEach(o -> addChild(o.getExpr()));
}
}

View File

@ -37,6 +37,7 @@ public class SelectList {
private boolean isDistinct;
private Map<String, String> optHints;
private List<OrderByElement> orderByElements;
// ///////////////////////////////////////
// BEGIN: Members that need to be reset()
@ -90,6 +91,12 @@ public class SelectList {
}
}
public void setOrderByElements(List<OrderByElement> orderByElements) {
if (orderByElements != null) {
this.orderByElements = orderByElements;
}
}
public void reset() {
for (SelectListItem item : items) {
if (!item.isStar()) {

View File

@ -55,6 +55,8 @@ public class AggregateFunction extends Function {
public static ImmutableSet<String> ALWAYS_NULLABLE_AGGREGATE_FUNCTION_NAME_SET =
ImmutableSet.of("stddev_samp", "variance_samp", "var_samp", "percentile_approx");
public static ImmutableSet<String> SUPPORT_ORDER_BY_AGGREGATE_FUNCTION_NAME_SET = ImmutableSet.of("group_concat");
// Set if different from retType_, null otherwise.
private Type intermediateType;

View File

@ -36,6 +36,7 @@ import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TExpr;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
import org.apache.doris.thrift.TSortInfo;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
@ -249,14 +250,27 @@ public class AggregationNode extends PlanNode {
protected void toThrift(TPlanNode msg) {
msg.node_type = TPlanNodeType.AGGREGATION_NODE;
List<TExpr> aggregateFunctions = Lists.newArrayList();
List<TSortInfo> aggSortInfos = Lists.newArrayList();
// only serialize agg exprs that are being materialized
for (FunctionCallExpr e : aggInfo.getMaterializedAggregateExprs()) {
aggregateFunctions.add(e.treeToThrift());
List<TExpr> orderingExpr = Lists.newArrayList();
List<Boolean> isAscs = Lists.newArrayList();
List<Boolean> nullFirsts = Lists.newArrayList();
e.getOrderByElements().forEach(o -> {
orderingExpr.add(o.getExpr().treeToThrift());
isAscs.add(o.getIsAsc());
nullFirsts.add(o.getNullsFirstParam());
});
aggSortInfos.add(new TSortInfo(orderingExpr, isAscs, nullFirsts));
}
msg.agg_node = new TAggregationNode(
aggregateFunctions,
aggInfo.getIntermediateTupleId().asInt(),
aggInfo.getOutputTupleId().asInt(), needsFinalize);
msg.agg_node.setAggSortInfos(aggSortInfos);
msg.agg_node.setUseStreamingPreaggregation(useStreamingPreagg);
List<Expr> groupingExprs = aggInfo.getGroupingExprs();
if (groupingExprs != null) {