From 4a474420c8b33a17efe29c7a402a31ee99f35da1 Mon Sep 17 00:00:00 2001 From: Jing Shen Date: Fri, 10 Jun 2022 10:32:40 +0800 Subject: [PATCH] [feature](function) Add ntile function (#9867) Add ntile function. For non-vectorized-engine, I just implemented like Impala, rewrite ntile to row_number and count. But for vectorized-engine, I implemented WindowFunctionNTile. --- .../aggregate_function_window.cpp | 10 ++ .../aggregate_function_window.h | 52 ++++++++ be/src/vec/exec/vanalytic_eval_node.cpp | 2 +- docs/.vuepress/sidebar/en/docs.js | 1 + docs/.vuepress/sidebar/zh-CN/docs.js | 1 + .../window-functions/WINDOW-FUNCTION-NTILE.md | 43 ++++++ .../window-functions/WINDOW-FUNCTION-NTILE.md | 43 ++++++ .../apache/doris/analysis/AnalyticExpr.java | 126 +++++++++++++++++- .../analysis/BuiltinAggregateFunction.java | 3 +- .../doris/analysis/FunctionCallExpr.java | 3 +- .../org/apache/doris/analysis/SelectStmt.java | 11 ++ .../org/apache/doris/catalog/FunctionSet.java | 7 + gensrc/thrift/PlanNodes.thrift | 1 + .../window_functions/test_ntile_function.csv | 9 ++ .../window_functions/test_ntile_function.out | 89 +++++++++++++ .../test_ntile_function.groovy | 87 ++++++++++++ 16 files changed, 484 insertions(+), 4 deletions(-) create mode 100644 docs/en/docs/sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-NTILE.md create mode 100644 docs/zh-CN/docs/sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-NTILE.md create mode 100644 regression-test/data/query/sql_functions/window_functions/test_ntile_function.csv create mode 100644 regression-test/data/query/sql_functions/window_functions/test_ntile_function.out create mode 100644 regression-test/suites/query/sql_functions/window_functions/test_ntile_function.groovy diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.cpp b/be/src/vec/aggregate_functions/aggregate_function_window.cpp index 53a4c4931c..1a342d805a 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_window.cpp @@ -53,6 +53,15 @@ AggregateFunctionPtr create_aggregate_function_row_number(const std::string& nam return std::make_shared(argument_types); } +AggregateFunctionPtr create_aggregate_function_ntile(const std::string& name, + const DataTypes& argument_types, + const Array& parameters, + const bool result_is_nullable) { + assert_unary(name, argument_types); + + return std::make_shared(argument_types, parameters); +} + template AggregateFunctionPtr create_aggregate_function_lag(const std::string& name, const DataTypes& argument_types, @@ -77,6 +86,7 @@ void register_aggregate_function_window_rank(AggregateFunctionSimpleFactory& fac factory.register_function("dense_rank", create_aggregate_function_dense_rank); factory.register_function("rank", create_aggregate_function_rank); factory.register_function("row_number", create_aggregate_function_row_number); + factory.register_function("ntile", create_aggregate_function_ntile); } void register_aggregate_function_window_lead_lag(AggregateFunctionSimpleFactory& factory) { diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h index 2a9bebfd11..37b73bd7c1 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window.h @@ -155,6 +155,58 @@ public: void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {} }; +struct NTileData { + int64_t bucket_index; + int64_t rows; +}; + +class WindowFunctionNTile final + : public IAggregateFunctionDataHelper { +public: + WindowFunctionNTile(const DataTypes& argument_types_, const Array& parameters) + : IAggregateFunctionDataHelper(argument_types_, parameters) {} + + String get_name() const override { return "ntile"; } + + DataTypePtr get_return_type() const override { return std::make_shared(); } + + void add(AggregateDataPtr place, const IColumn**, size_t, Arena*) const override {} + + void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, + int64_t frame_end, AggregateDataPtr place, const IColumn** columns, + Arena* arena) const override { + // some variables are partition related, but there is no chance to init them + // when the new partition arrives, so we calculate them evey time now. + // Partition = big_bucket_num * big_bucket_size + small_bucket_num * small_bucket_size + int64_t row_index = ++WindowFunctionNTile::data(place).rows - 1; + int64_t bucket_num = columns[0]->get_int(0); + int64_t partition_size = partition_end - partition_start; + + int64 small_bucket_size = partition_size / bucket_num; + int64 big_bucket_num = partition_size % bucket_num; + int64 first_small_bucket_row_index = big_bucket_num * (small_bucket_size + 1); + if (row_index >= first_small_bucket_row_index) { + // small_bucket_size can't be zero + WindowFunctionNTile::data(place).bucket_index = + big_bucket_num + 1 + + (row_index - first_small_bucket_row_index) / small_bucket_size; + } else { + WindowFunctionNTile::data(place).bucket_index = row_index / (small_bucket_size + 1) + 1; + } + } + + void reset(AggregateDataPtr place) const override { WindowFunctionNTile::data(place).rows = 0; } + + void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override { + assert_cast(to).get_data().push_back( + WindowFunctionNTile::data(place).bucket_index); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {} + void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {} + void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {} +}; + struct Value { public: bool is_null() const { return _is_null; } diff --git a/be/src/vec/exec/vanalytic_eval_node.cpp b/be/src/vec/exec/vanalytic_eval_node.cpp index a88c7a746a..8c79be5a92 100644 --- a/be/src/vec/exec/vanalytic_eval_node.cpp +++ b/be/src/vec/exec/vanalytic_eval_node.cpp @@ -580,7 +580,7 @@ Status VAnalyticEvalNode::_output_current_block(Block* block) { return Status::OK(); } -//now is execute for lead/lag row_number/rank/dense_rank functions +//now is execute for lead/lag row_number/rank/dense_rank/ntile functions //sum min max count avg first_value last_value functions void VAnalyticEvalNode::_execute_for_win_func(BlockRowPos partition_start, BlockRowPos partition_end, BlockRowPos frame_start, diff --git a/docs/.vuepress/sidebar/en/docs.js b/docs/.vuepress/sidebar/en/docs.js index fce54a678b..f77fd24c74 100644 --- a/docs/.vuepress/sidebar/en/docs.js +++ b/docs/.vuepress/sidebar/en/docs.js @@ -540,6 +540,7 @@ module.exports = [ "WINDOW-FUNCTION-FIRST-VALUE", "WINDOW-FUNCTION-LAST-VALUE", "WINDOW-FUNCTION-ROW-NUMBER", + "WINDOW-FUNCTION-NTILE", ], }, { diff --git a/docs/.vuepress/sidebar/zh-CN/docs.js b/docs/.vuepress/sidebar/zh-CN/docs.js index e70acc8f53..fea0e6bcaa 100644 --- a/docs/.vuepress/sidebar/zh-CN/docs.js +++ b/docs/.vuepress/sidebar/zh-CN/docs.js @@ -540,6 +540,7 @@ module.exports = [ "WINDOW-FUNCTION-FIRST-VALUE", "WINDOW-FUNCTION-LAST-VALUE", "WINDOW-FUNCTION-ROW-NUMBER", + "WINDOW-FUNCTION-NTILE", ], }, { diff --git a/docs/en/docs/sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-NTILE.md b/docs/en/docs/sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-NTILE.md new file mode 100644 index 0000000000..d0b20d585c --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-NTILE.md @@ -0,0 +1,43 @@ +--- +{ + "title": "WINDOW-FUNCTION-NTILE", + "language": "en" +} +--- + + + +## WINDOW FUNCTION NTILE +### description + +For NTILE(n), this function will divides rows in a sorted partition into a specific number of groups(in this case, n buckets). Each group is assigned a bucket number starting at one. For the case that cannot be distributed evenly, rows are preferentially allocated to the bucket with the smaller number. The number of rows in all buckets cannot differ by more than 1. For now, n must be constant positive integer. + +```sql +NTILE(n) OVER(partition_by_clause order_by_clause) +``` + +### example + +```sql +select x, y, ntile(2) over(partition by x order by y) as ntile from int_t; + +| x | y | rank | +|---|------|----------| +| 1 | 1 | 1 | +| 1 | 2 | 1 | +| 1 | 2 | 2 | +| 2 | 1 | 1 | +| 2 | 2 | 1 | +| 2 | 3 | 2 | +| 3 | 1 | 1 | +| 3 | 1 | 1 | +| 3 | 2 | 2 | +``` + +### keywords + + WINDOW,FUNCTION,NTILE \ No newline at end of file diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-NTILE.md b/docs/zh-CN/docs/sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-NTILE.md new file mode 100644 index 0000000000..36af3dee03 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-NTILE.md @@ -0,0 +1,43 @@ +--- +{ + "title": "WINDOW-FUNCTION-NTILE", + "language": "zh-CN" +} +--- + + + +## WINDOW FUNCTION NTILE +### description + +对于NTILE(n), 该函数会将排序分区中的所有行按顺序分配到n个桶中(编号较小的桶满了之后才能分配编号较大的桶)。对于每一行, NTILE()函数会返回该行数据所在的桶的编号(从1到n)。对于不能平均分配的情况, 优先分配到编号较小的桶中。所有桶中的行数相差不能超过1。目前n只能是正整数。 + +```sql +NTILE(n) OVER(partition_by_clause order_by_clause) +``` + +### example + +```sql +select x, y, ntile(2) over(partition by x order by y) as rank from int_t; + +| x | y | rank | +|---|------|----------| +| 1 | 1 | 1 | +| 1 | 2 | 1 | +| 1 | 2 | 2 | +| 2 | 1 | 1 | +| 2 | 2 | 1 | +| 2 | 3 | 2 | +| 3 | 1 | 1 | +| 3 | 1 | 1 | +| 3 | 2 | 2 | +``` + +### keywords + + WINDOW,FUNCTION,NTILE \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyticExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyticExpr.java index 55169443e4..b0147dff55 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyticExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyticExpr.java @@ -63,6 +63,7 @@ import java.util.Objects; */ public class AnalyticExpr extends Expr { private final static Logger LOG = LoggerFactory.getLogger(AnalyticExpr.class); + private static String NTILE = "NTILE"; private FunctionCallExpr fnCall; private final List partitionExprs; @@ -236,7 +237,8 @@ public class AnalyticExpr extends Expr { return fn.functionName().equalsIgnoreCase(RANK) || fn.functionName().equalsIgnoreCase(DENSERANK) - || fn.functionName().equalsIgnoreCase(ROWNUMBER); + || fn.functionName().equalsIgnoreCase(ROWNUMBER) + || fn.functionName().equalsIgnoreCase(NTILE); } static private boolean isHllAggFn(Function fn) { @@ -247,6 +249,110 @@ public class AnalyticExpr extends Expr { return fn.functionName().equalsIgnoreCase(HLL_UNION_AGG); } + private static boolean isNTileFn(Function fn) { + if (!isAnalyticFn(fn)) { + return false; + } + + return fn.functionName().equalsIgnoreCase(NTILE); + } + + /** + * Rewrite the following analytic functions: ntile(). + * Returns a new Expr if the analytic expr is rewritten, returns null if it's not one + * that we want to equal. + */ + public static Expr rewrite(AnalyticExpr analyticExpr) { + Function fn = analyticExpr.getFnCall().getFn(); + // TODO(zc) + // if (AnalyticExpr.isPercentRankFn(fn)) { + // return createPercentRank(analyticExpr); + // } else if (AnalyticExpr.isCumeDistFn(fn)) { + // return createCumeDist(analyticExpr); + // } else if (AnalyticExpr.isNtileFn(fn)) { + // return createNtile(analyticExpr); + // } + if (isNTileFn(fn) && !VectorizedUtil.isVectorized()) { + return createNTile(analyticExpr); + } + return null; + } + + /** + * Rewrite ntile(). + * The logic is translated from be class WindowFunctionNTile. + * count = bigBucketNum * (smallBucketSize + 1) + smallBucketNum * smallBucketSize + * bigBucketNum + smallBucketNum = bucketNum + */ + private static Expr createNTile(AnalyticExpr analyticExpr) { + Preconditions.checkState(AnalyticExpr.isNTileFn(analyticExpr.getFnCall().getFn())); + Expr bucketNum = analyticExpr.getChild(0); + AnalyticExpr rowNum = create("row_number", analyticExpr, true, false); + AnalyticExpr count = create("count", analyticExpr, false, false); + + IntLiteral one = new IntLiteral(1); + ArithmeticExpr smallBucketSize = new ArithmeticExpr(ArithmeticExpr.Operator.INT_DIVIDE, + count, bucketNum); + ArithmeticExpr bigBucketNum = new ArithmeticExpr(ArithmeticExpr.Operator.MOD, + count, bucketNum); + ArithmeticExpr firstSmallBucketRowIndex = new ArithmeticExpr(ArithmeticExpr.Operator.MULTIPLY, + bigBucketNum, + new ArithmeticExpr(ArithmeticExpr.Operator.ADD, + smallBucketSize, one)); + ArithmeticExpr rowIndex = new ArithmeticExpr(ArithmeticExpr.Operator.SUBTRACT, + rowNum, one); + + + List ifParams = new ArrayList<>(); + ifParams.add( + new BinaryPredicate(BinaryPredicate.Operator.GE, rowIndex, firstSmallBucketRowIndex)); + + ArithmeticExpr rowInSmallBucket = new ArithmeticExpr(ArithmeticExpr.Operator.ADD, + new ArithmeticExpr(ArithmeticExpr.Operator.ADD, + bigBucketNum, one), + new ArithmeticExpr(ArithmeticExpr.Operator.INT_DIVIDE, + new ArithmeticExpr(ArithmeticExpr.Operator.SUBTRACT, + rowIndex, firstSmallBucketRowIndex), + smallBucketSize)); + ArithmeticExpr rowInBigBucket = new ArithmeticExpr(ArithmeticExpr.Operator.ADD, + new ArithmeticExpr(ArithmeticExpr.Operator.INT_DIVIDE, + rowIndex, + new ArithmeticExpr(ArithmeticExpr.Operator.ADD, + smallBucketSize, one)), + one); + ifParams.add(rowInSmallBucket); + ifParams.add(rowInBigBucket); + + return new FunctionCallExpr("if", ifParams); + } + + /** + * Create a new Analytic Expr and associate it with a new function. + * Takes a reference analytic expression and clones the partition expressions and the + * order by expressions if 'copyOrderBy' is set and optionally reverses it if + * 'reverseOrderBy' is set. The new function that it will be associated with is + * specified by fnName. + */ + private static AnalyticExpr create(String fnName, + AnalyticExpr referenceExpr, boolean copyOrderBy, boolean reverseOrderBy) { + FunctionCallExpr fnExpr = new FunctionCallExpr(fnName, new ArrayList<>()); + fnExpr.setIsAnalyticFnCall(true); + List orderByElements = null; + if (copyOrderBy) { + if (reverseOrderBy) { + orderByElements = OrderByElement.reverse(referenceExpr.getOrderByElements()); + } else { + orderByElements = new ArrayList<>(); + for (OrderByElement elem : referenceExpr.getOrderByElements()) { + orderByElements.add(elem.clone()); + } + } + } + AnalyticExpr analyticExpr = new AnalyticExpr(fnExpr, + Expr.cloneList(referenceExpr.getPartitionExprs()), orderByElements, null); + return analyticExpr; + } + /** * Checks that the value expr of an offset boundary of a RANGE window is compatible * with orderingExprs (and that there's only a single ordering expr). @@ -533,6 +639,24 @@ public class AnalyticExpr extends Expr { return; } + if (analyticFnName.getFunction().equalsIgnoreCase(NTILE)) { + Preconditions.checkState(window == null, "Unexpected window set for ntile()"); + + Expr bucketExpr = getFnCall().getFnParams().exprs().get(0); + if (bucketExpr instanceof LiteralExpr && bucketExpr.getType().getPrimitiveType().isIntegerType()) { + Preconditions.checkState(((LiteralExpr) bucketExpr).getLongValue() > 0, + "Parameter n in ntile(n) should be positive."); + } else { + throw new AnalysisException("Parameter n in ntile(n) should be constant positive integer."); + } + + window = new AnalyticWindow(AnalyticWindow.Type.ROWS, + new Boundary(BoundaryType.UNBOUNDED_PRECEDING, null), + new Boundary(BoundaryType.CURRENT_ROW, null)); + resetWindow = true; + return; + } + // Explicitly set the default arguments to lead()/lag() for BE simplicity. // Set a window for lead(): UNBOUNDED PRECEDING to OFFSET FOLLOWING, // Set a window for lag(): UNBOUNDED PRECEDING to OFFSET PRECEDING. diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BuiltinAggregateFunction.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BuiltinAggregateFunction.java index 3ccb85f0ae..00b99e87de 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BuiltinAggregateFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BuiltinAggregateFunction.java @@ -117,7 +117,8 @@ public class BuiltinAggregateFunction extends Function { DENSE_RANK("DENSE_RANK", TAggregationOp.DENSE_RANK, null), ROW_NUMBER("ROW_NUMBER", TAggregationOp.ROW_NUMBER, null), LEAD("LEAD", TAggregationOp.LEAD, null), - FIRST_VALUE_REWRITE("FIRST_VALUE_REWRITE", null, null); + FIRST_VALUE_REWRITE("FIRST_VALUE_REWRITE", null, null), + NTILE("NTILE", TAggregationOp.NTILE, null); private final String description; private final TAggregationOp thriftOp; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 4b6bfe2cb4..594bd50c09 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -521,7 +521,8 @@ public class FunctionCallExpr extends Expr { || fnName.getFunction().equalsIgnoreCase("row_number") || fnName.getFunction().equalsIgnoreCase("first_value") || fnName.getFunction().equalsIgnoreCase("last_value") - || fnName.getFunction().equalsIgnoreCase("first_value_rewrite")) { + || fnName.getFunction().equalsIgnoreCase("first_value_rewrite") + || fnName.getFunction().equalsIgnoreCase("ntile")) { if (!isAnalyticFnCall) { throw new AnalysisException(fnName.getFunction() + " only used in analytic function"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index 8e7a56598d..067a8882de 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -1277,6 +1277,17 @@ public class SelectStmt extends QueryStmt { return; } ExprSubstitutionMap rewriteSmap = new ExprSubstitutionMap(); + for (Expr expr : analyticExprs) { + AnalyticExpr toRewrite = (AnalyticExpr) expr; + Expr newExpr = AnalyticExpr.rewrite(toRewrite); + if (newExpr != null) { + newExpr.analyze(analyzer); + if (!rewriteSmap.containsMappingFor(toRewrite)) { + rewriteSmap.put(toRewrite, newExpr); + } + } + } + if (rewriteSmap.size() > 0) { // Substitute the exprs with their rewritten versions. ArrayList updatedAnalyticExprs = diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 63c49b2f14..88808abe89 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -36,6 +36,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -2347,6 +2348,9 @@ public class FunctionSet { prefix + "17count_star_updateEPN9doris_udf15FunctionContextEPNS1_9BigIntValE", prefix + "11count_mergeEPN9doris_udf15FunctionContextERKNS1_9BigIntValEPS4_", null, null)); + //ntile, we use rewrite sql for ntile, actually we don't really need this. + addBuiltin(AggregateFunction.createAnalyticBuiltin("ntile", + Collections.singletonList(Type.BIGINT), Type.BIGINT, Type.BIGINT, null, null, null, null, null)); //vec Rank addBuiltin(AggregateFunction.createAnalyticBuiltin("rank", @@ -2371,6 +2375,9 @@ public class FunctionSet { prefix + "17count_star_updateEPN9doris_udf15FunctionContextEPNS1_9BigIntValE", prefix + "11count_mergeEPN9doris_udf15FunctionContextERKNS1_9BigIntValEPS4_", null, null, true)); + //vec ntile + addBuiltin(AggregateFunction.createAnalyticBuiltin("ntile", + Collections.singletonList(Type.BIGINT), Type.BIGINT, Type.BIGINT, null, null, null, null, null, true)); for (Type t : Type.getSupportedTypes()) { if (t.isNull()) { diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 1494a6529a..98b1d6e182 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -442,6 +442,7 @@ enum TAggregationOp { LAG, HLL_C, BITMAP_UNION, + NTILE, } //struct TAggregateFunctionCall { diff --git a/regression-test/data/query/sql_functions/window_functions/test_ntile_function.csv b/regression-test/data/query/sql_functions/window_functions/test_ntile_function.csv new file mode 100644 index 0000000000..285b93bdda --- /dev/null +++ b/regression-test/data/query/sql_functions/window_functions/test_ntile_function.csv @@ -0,0 +1,9 @@ +1 1 1 +1 1 1 +1 1 3 +1 2 1 +1 3 1 +1 3 2 +1 3 3 +5 4 3 +4 3 1 diff --git a/regression-test/data/query/sql_functions/window_functions/test_ntile_function.out b/regression-test/data/query/sql_functions/window_functions/test_ntile_function.out new file mode 100644 index 0000000000..629e947701 --- /dev/null +++ b/regression-test/data/query/sql_functions/window_functions/test_ntile_function.out @@ -0,0 +1,89 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 1 3 1 +1 1 1 1 +1 1 1 1 +1 2 1 2 +1 3 3 2 +1 3 2 3 +1 3 1 3 +4 3 1 1 +5 4 3 1 + +-- !select -- +1 1 3 1 +1 1 1 1 +1 1 1 2 +1 2 1 2 +1 3 3 3 +1 3 2 4 +1 3 1 5 +4 3 1 1 +5 4 3 1 + +-- !select -- +1 1 3 1 +1 1 1 1 +1 1 1 1 +2 1 1 2 +3 1 3 2 +3 1 2 2 +3 1 1 3 +3 4 1 3 +4 5 3 3 + +-- !select -- +1 1 1 1 +1 1 1 1 +1 2 1 2 +1 3 1 2 +1 3 4 3 +2 3 1 1 +3 1 1 1 +3 3 1 2 +3 4 5 3 + +-- !select -- +1 1 3 1 +1 1 1 1 +1 1 1 1 +1 2 1 2 +1 3 3 2 +1 3 2 3 +1 3 1 3 +4 3 1 1 +5 4 3 1 + +-- !select -- +1 1 3 1 +1 1 1 1 +1 1 1 2 +1 2 1 2 +1 3 3 3 +1 3 2 4 +1 3 1 5 +4 3 1 1 +5 4 3 1 + +-- !select -- +1 1 3 1 +1 1 1 1 +1 1 1 1 +2 1 1 2 +3 1 3 2 +3 1 2 2 +3 1 1 3 +3 4 1 3 +4 5 3 3 + +-- !select -- +1 1 1 1 +1 1 1 1 +1 2 1 2 +1 3 1 2 +1 3 4 3 +2 3 1 1 +3 1 1 1 +3 3 1 2 +3 4 5 3 + diff --git a/regression-test/suites/query/sql_functions/window_functions/test_ntile_function.groovy b/regression-test/suites/query/sql_functions/window_functions/test_ntile_function.groovy new file mode 100644 index 0000000000..c0d79248e2 --- /dev/null +++ b/regression-test/suites/query/sql_functions/window_functions/test_ntile_function.groovy @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("test_ntile_function", "query") { + def tableName = "test_ntile_function" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE `${tableName}` ( + `k1` tinyint(4) NOT NULL COMMENT "", + `k2` smallint(6) NOT NULL COMMENT "", + `k3` smallint(6) NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`k1`, `k2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`k1`) BUCKETS 5 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ); + """ + streamLoad { + table tableName + + // default label is UUID: + // set 'label' UUID.randomUUID().toString() + + // default column_separator is specify in doris fe config, usually is '\t'. + // this line change to ',' + set 'column_separator', '\t' + + // relate to ${DORIS_HOME}/regression-test/data/demo/streamload_input.csv. + // also, you can stream load a http stream, e.g. http://xxx/some.csv + file 'test_ntile_function.csv' + + time 10000 // limit inflight 10s + + // stream load action will check result, include Success status, and NumberTotalRows == NumberLoadedRows + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + // Not Vectorized + sql """ set enable_vectorized_engine = false """ + + qt_select "select k1, k2, k3, ntile(3) over (partition by k1 order by k2) as ntile from ${tableName};" + qt_select "select k1, k2, k3, ntile(5) over (partition by k1 order by k2) as ntile from ${tableName};" + qt_select "select k2, k1, k3, ntile(3) over (order by k2) as ntile from ${tableName};" + qt_select "select k3, k2, k1, ntile(3) over (partition by k3 order by k2) as ntile from ${tableName};" + + // vectorized + sql """ set enable_vectorized_engine = true """ + + qt_select "select k1, k2, k3, ntile(3) over (partition by k1 order by k2) as ntile from ${tableName};" + qt_select "select k1, k2, k3, ntile(5) over (partition by k1 order by k2) as ntile from ${tableName};" + qt_select "select k2, k1, k3, ntile(3) over (order by k2) as ntile from ${tableName};" + qt_select "select k3, k2, k1, ntile(3) over (partition by k3 order by k2) as ntile from ${tableName};" +} + + + + +