[Feature](count_by_enum) support count_by_enum function (#22071)
count_by_enum(expr1, expr2, ... , exprN); Treats the data in a column as an enumeration and counts the number of values in each enumeration. Returns the number of enumerated values for each column, and the number of non-null values versus the number of null values.
This commit is contained in:
@ -27,6 +27,7 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.BitmapUnionInt;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.CollectList;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.CollectSet;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.CountByEnum;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitAnd;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitOr;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitXor;
|
||||
@ -85,6 +86,7 @@ public class BuiltinAggregateFunctions implements FunctionHelper {
|
||||
agg(CollectList.class, "collect_list"),
|
||||
agg(CollectSet.class, "collect_set"),
|
||||
agg(Count.class, "count"),
|
||||
agg(CountByEnum.class, "count_by_enum"),
|
||||
agg(GroupBitAnd.class, "group_bit_and"),
|
||||
agg(GroupBitOr.class, "group_bit_or"),
|
||||
agg(GroupBitXor.class, "group_bit_xor"),
|
||||
|
||||
@ -203,6 +203,7 @@ public class FunctionSet<T> {
|
||||
public static final String HISTOGRAM = "histogram";
|
||||
public static final String HIST = "hist";
|
||||
public static final String MAP_AGG = "map_agg";
|
||||
public static final String COUNT_BY_ENUM = "count_by_enum";
|
||||
|
||||
private static final Map<Type, String> TOPN_UPDATE_SYMBOL =
|
||||
ImmutableMap.<Type, String>builder()
|
||||
@ -1613,6 +1614,21 @@ public class FunctionSet<T> {
|
||||
"lead", Lists.newArrayList(t, Type.BIGINT), t, t, true));
|
||||
}
|
||||
|
||||
// count_by_enum
|
||||
addBuiltin(AggregateFunction.createBuiltin(COUNT_BY_ENUM,
|
||||
Lists.newArrayList(Type.STRING),
|
||||
Type.STRING,
|
||||
Type.STRING,
|
||||
true,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
false, true, false, true));
|
||||
|
||||
}
|
||||
|
||||
public Map<String, List<Function>> getVectorizedFunctions() {
|
||||
|
||||
@ -0,0 +1,63 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.nereids.trees.expressions.functions.agg;
|
||||
|
||||
import org.apache.doris.catalog.FunctionSignature;
|
||||
import org.apache.doris.nereids.trees.expressions.Expression;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
|
||||
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
|
||||
import org.apache.doris.nereids.types.ArrayType;
|
||||
import org.apache.doris.nereids.types.StringType;
|
||||
import org.apache.doris.nereids.util.ExpressionUtils;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/** count_by_enum agg function. */
|
||||
public class CountByEnum extends AggregateFunction implements ExplicitlyCastableSignature, AlwaysNotNullable {
|
||||
|
||||
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
|
||||
FunctionSignature.ret(ArrayType.of(StringType.INSTANCE)).args(StringType.INSTANCE)
|
||||
);
|
||||
|
||||
/**
|
||||
* constructor with 1 or more arguments.
|
||||
*/
|
||||
public CountByEnum(Expression arg, Expression... varArgs) {
|
||||
super("count_by_enum", ExpressionUtils.mergeArguments(arg, varArgs));
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregateFunction withDistinctAndChildren(boolean distinct, List<Expression> children) {
|
||||
Preconditions.checkArgument(children.size() == 1);
|
||||
return new CollectList(distinct, children.get(0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
|
||||
return visitor.visitCountByEnum(this, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FunctionSignature> getSignatures() {
|
||||
return SIGNATURES;
|
||||
}
|
||||
}
|
||||
@ -28,6 +28,7 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.BitmapUnionInt;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.CollectList;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.CollectSet;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.CountByEnum;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitAnd;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitOr;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitXor;
|
||||
@ -118,6 +119,10 @@ public interface AggregateFunctionVisitor<R, C> {
|
||||
return visitAggregateFunction(count, context);
|
||||
}
|
||||
|
||||
default R visitCountByEnum(CountByEnum count, C context) {
|
||||
return visitAggregateFunction(count, context);
|
||||
}
|
||||
|
||||
default R visitMultiDistinctCount(MultiDistinctCount multiDistinctCount, C context) {
|
||||
return visitAggregateFunction(multiDistinctCount, context);
|
||||
}
|
||||
|
||||
@ -527,4 +527,46 @@ public class AggregateTest extends TestWithFeService {
|
||||
}
|
||||
} while (false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountByEnumAnalysisException() throws Exception {
|
||||
ConnectContext ctx = UtFrameUtils.createDefaultCtx();
|
||||
|
||||
// normal.
|
||||
do {
|
||||
String query = "select count_by_enum(name) from "
|
||||
+ DB_NAME + "." + TABLE_NAME;
|
||||
try {
|
||||
UtFrameUtils.parseAndAnalyzeStmt(query, ctx);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("must be AnalysisException.");
|
||||
}
|
||||
} while (false);
|
||||
|
||||
do {
|
||||
String query = "select count_by_enum(name, commission) from "
|
||||
+ DB_NAME + "." + TABLE_NAME;
|
||||
try {
|
||||
UtFrameUtils.parseAndAnalyzeStmt(query, ctx);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("must be AnalysisException.");
|
||||
}
|
||||
} while (false);
|
||||
|
||||
// less argument.
|
||||
do {
|
||||
String query = "select count_by_enum() from "
|
||||
+ DB_NAME + "." + TABLE_NAME;
|
||||
try {
|
||||
UtFrameUtils.parseAndAnalyzeStmt(query, ctx);
|
||||
} catch (AnalysisException e) {
|
||||
Assert.assertTrue(e.getMessage().contains("No matching function with signature: count_by_enum()"));
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
Assert.fail("must be AnalysisException.");
|
||||
}
|
||||
Assert.fail("must be AnalysisException.");
|
||||
} while (false);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user