diff --git a/be/src/vec/aggregate_functions/aggregate_function_topn.cpp b/be/src/vec/aggregate_functions/aggregate_function_topn.cpp index 19f52fbff8..74bb154d6a 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_topn.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_topn.cpp @@ -17,6 +17,8 @@ #include +#include "vec/aggregate_functions/helpers.h" +#include "vec/core/types.h" namespace doris::vectorized { AggregateFunctionPtr create_aggregate_function_topn(const std::string& name, @@ -38,8 +40,67 @@ AggregateFunctionPtr create_aggregate_function_topn(const std::string& name, return nullptr; } +template +AggregateFunctionPtr create_topn_array(const DataTypes& argument_types) { + auto type = argument_types[0].get(); + if (type->is_nullable()) { + type = assert_cast(type)->get_nested_type().get(); + } + + WhichDataType which(*type); + +#define DISPATCH(TYPE) \ + if (which.idx == TypeIndex::TYPE) \ + return AggregateFunctionPtr( \ + new AggregateFunctionTopNArray< \ + AggregateFunctionTopNImplArray, TYPE>( \ + argument_types)); + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (which.is_string_or_fixed_string()) { + return AggregateFunctionPtr(new AggregateFunctionTopNArray< + AggregateFunctionTopNImplArray, + std::string>(argument_types)); + } + if (which.is_decimal()) { + return AggregateFunctionPtr( + new AggregateFunctionTopNArray< + AggregateFunctionTopNImplArray, Decimal128>( + argument_types)); + } + if (which.is_date_or_datetime() || which.is_date_time_v2()) { + return AggregateFunctionPtr( + new AggregateFunctionTopNArray< + AggregateFunctionTopNImplArray, Int64>( + argument_types)); + } + if (which.is_date_v2()) { + return AggregateFunctionPtr( + new AggregateFunctionTopNArray< + AggregateFunctionTopNImplArray, UInt32>( + argument_types)); + } + + LOG(WARNING) << fmt::format("Illegal argument type for aggregate function topn_array is: {}", + type->get_name()); + return nullptr; +} + +AggregateFunctionPtr create_aggregate_function_topn_array(const std::string& name, + const DataTypes& argument_types, + const Array& parameters, + const bool result_is_nullable) { + bool has_default_param = (argument_types.size() == 3); + if (has_default_param) { + return create_topn_array(argument_types); + } else { + return create_topn_array(argument_types); + } +} + void register_aggregate_function_topn(AggregateFunctionSimpleFactory& factory) { factory.register_function("topn", create_aggregate_function_topn); + factory.register_function("topn_array", create_aggregate_function_topn_array); } } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/aggregate_functions/aggregate_function_topn.h b/be/src/vec/aggregate_functions/aggregate_function_topn.h index ae9fdf322d..b7aa6ae14c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_topn.h +++ b/be/src/vec/aggregate_functions/aggregate_function_topn.h @@ -21,28 +21,31 @@ #include #include +#include #include #include "vec/aggregate_functions/aggregate_function.h" -#include "vec/aggregate_functions/aggregate_function_group_concat.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" -#include "vec/aggregate_functions/helpers.h" -#include "vec/data_types/data_type_date_time.h" -#include "vec/data_types/data_type_decimal.h" -#include "vec/data_types/data_type_number.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_vector.h" +#include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_string.h" #include "vec/io/io_helper.h" namespace doris::vectorized { // space-saving algorithm +template struct AggregateFunctionTopNData { + using ColVecType = + std::conditional_t, ColumnDecimal, ColumnVector>; void set_paramenters(int input_top_num, int space_expand_rate = 50) { top_num = input_top_num; capacity = (uint64_t)top_num * space_expand_rate; } - void add(const std::string& value) { + void add(const T& value) { auto it = counter_map.find(value); if (it != counter_map.end()) { it->second++; @@ -93,13 +96,13 @@ struct AggregateFunctionTopNData { } } - std::vector> get_remain_vector() const { - std::vector> counter_vector; + std::vector> get_remain_vector() const { + std::vector> counter_vector; for (auto it : counter_map) { counter_vector.emplace_back(it.second, it.first); } std::sort(counter_vector.begin(), counter_vector.end(), - std::greater>()); + std::greater>()); return counter_vector; } @@ -127,7 +130,7 @@ struct AggregateFunctionTopNData { read_binary(element_number, buf); counter_map.clear(); - std::pair element; + std::pair element; for (auto i = 0; i < element_number; i++) { read_binary(element.first, buf); read_binary(element.second, buf); @@ -152,11 +155,24 @@ struct AggregateFunctionTopNData { return buffer.GetString(); } + void insert_result_into(IColumn& to) const { + auto counter_vector = get_remain_vector(); + for (int i = 0; i < std::min((int)counter_vector.size(), top_num); i++) { + const auto& element = counter_vector[i]; + if constexpr (std::is_same_v) { + static_cast(to).insert_data(element.second.c_str(), + element.second.length()); + } else { + static_cast(to).get_data().push_back(element.second); + } + } + } + void reset() { counter_map.clear(); } int top_num = 0; uint64_t capacity = 0; - phmap::flat_hash_map counter_map; + phmap::flat_hash_map counter_map; }; struct StringDataImplTopN { @@ -170,8 +186,8 @@ struct StringDataImplTopN { template struct AggregateFunctionTopNImplInt { - static void add(AggregateFunctionTopNData& __restrict place, const IColumn** columns, - size_t row_num) { + static void add(AggregateFunctionTopNData& __restrict place, + const IColumn** columns, size_t row_num) { place.set_paramenters(static_cast(columns[1])->get_element(row_num)); place.add(DataHelper::to_string(*columns[0], row_num)); } @@ -179,27 +195,49 @@ struct AggregateFunctionTopNImplInt { template struct AggregateFunctionTopNImplIntInt { - static void add(AggregateFunctionTopNData& __restrict place, const IColumn** columns, - size_t row_num) { + static void add(AggregateFunctionTopNData& __restrict place, + const IColumn** columns, size_t row_num) { place.set_paramenters(static_cast(columns[1])->get_element(row_num), static_cast(columns[2])->get_element(row_num)); place.add(DataHelper::to_string(*columns[0], row_num)); } }; +template +struct AggregateFunctionTopNImplArray { + using ColVecType = + std::conditional_t, ColumnDecimal, ColumnVector>; + static void add(AggregateFunctionTopNData& __restrict place, const IColumn** columns, + size_t row_num) { + if constexpr (has_default_param) { + place.set_paramenters( + static_cast(columns[1])->get_element(row_num), + static_cast(columns[2])->get_element(row_num)); + + } else { + place.set_paramenters( + static_cast(columns[1])->get_element(row_num)); + } + if constexpr (std::is_same_v) { + StringRef ref = static_cast(*columns[0]).get_data_at(row_num); + place.add(std::string(ref.data, ref.size)); + } else { + T val = static_cast(*columns[0]).get_data()[row_num]; + place.add(val); + } + } +}; + //base function -template -class AggregateFunctionTopN final - : public IAggregateFunctionDataHelper> { +template +class AggregateFunctionTopNBase + : public IAggregateFunctionDataHelper, + AggregateFunctionTopNBase> { public: - AggregateFunctionTopN(const DataTypes& argument_types_) - : IAggregateFunctionDataHelper>( - argument_types_, {}) {} - - String get_name() const override { return "topn"; } - - DataTypePtr get_return_type() const override { return std::make_shared(); } + AggregateFunctionTopNBase(const DataTypes& argument_types_) + : IAggregateFunctionDataHelper, + AggregateFunctionTopNBase>(argument_types_, + {}) {} void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num, Arena*) const override { @@ -221,6 +259,18 @@ public: Arena*) const override { this->data(place).read(buf); } +}; + +//topn function return string +template +class AggregateFunctionTopN final : public AggregateFunctionTopNBase { +public: + AggregateFunctionTopN(const DataTypes& argument_types_) + : AggregateFunctionTopNBase(argument_types_) {} + + String get_name() const override { return "topn"; } + + DataTypePtr get_return_type() const override { return std::make_shared(); } void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { std::string result = this->data(place).get(); @@ -228,4 +278,35 @@ public: } }; +//topn function return array +template +class AggregateFunctionTopNArray final : public AggregateFunctionTopNBase { +public: + AggregateFunctionTopNArray(const DataTypes& argument_types_) + : AggregateFunctionTopNBase(argument_types_), + _argument_type(argument_types_[0]) {} + + String get_name() const override { return "topn_array"; } + + DataTypePtr get_return_type() const override { + return std::make_shared(make_nullable(_argument_type)); + } + + void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { + auto& to_arr = assert_cast(to); + auto& to_nested_col = to_arr.get_data(); + if (to_nested_col.is_nullable()) { + auto col_null = reinterpret_cast(&to_nested_col); + this->data(place).insert_result_into(col_null->get_nested_column()); + col_null->get_null_map_data().resize_fill(col_null->get_nested_column().size(), 0); + } else { + this->data(place).insert_result_into(to_nested_col); + } + to_arr.get_offsets().push_back(to_nested_col.size()); + } + +private: + DataTypePtr _argument_type; +}; + } // namespace doris::vectorized diff --git a/be/test/vec/aggregate_functions/agg_test.cpp b/be/test/vec/aggregate_functions/agg_test.cpp index b9840fb099..951b1b8e3b 100644 --- a/be/test/vec/aggregate_functions/agg_test.cpp +++ b/be/test/vec/aggregate_functions/agg_test.cpp @@ -89,7 +89,7 @@ TEST(AggTest, topn_test) { agg_function->add(place, const_cast(columns), i, nullptr); } - std::string result = reinterpret_cast(place)->get(); + std::string result = reinterpret_cast*>(place)->get(); std::string expect_result = "{\"1\":2048,\"2\":683,\"3\":341,\"4\":205,\"5\":137,\"6\":97,\"7\":73,\"8\":57,\"9\":" "46,\"10\":37}"; diff --git a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp index 904d5ce80c..50bf612fd2 100644 --- a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp +++ b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp @@ -17,16 +17,14 @@ #include -#include "common/logging.h" #include "gtest/gtest.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" -#include "vec/aggregate_functions/aggregate_function_topn.h" #include "vec/columns/column_vector.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" - namespace doris::vectorized { void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory& factory); @@ -43,8 +41,7 @@ public: std::make_shared(), std::make_shared(), std::make_shared(), std::make_shared(), std::make_shared(), std::make_shared(), - std::make_shared(), - }; + std::make_shared()}; Array array; agg_function = factory.get("window_funnel", data_types, array, false); EXPECT_NE(agg_function, nullptr); diff --git a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md new file mode 100644 index 0000000000..d266287638 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md @@ -0,0 +1,61 @@ +--- +{ + "title": "topn_array", + "language": "en" +} +--- + + + +## topn_array +### description +#### Syntax + +`ARRAY topn_array(expr, INT top_num[, INT space_expand_rate])` + +The topn function uses the Space-Saving algorithm to calculate the top_num frequent items in expr, and the result is the +frequent items and their occurrence times, which is an approximation + +The space_expand_rate parameter is optional and is used to set the number of counters used in the Space-Saving algorithm +``` +counter numbers = top_num * space_expand_rate +``` +The higher value of space_expand_rate, the more accurate result will be. The default value is 50 + +### example +``` +mysql> select topn_array(k3,3) from baseall; ++--------------------------+ +| topn_array(`k3`, 3) | ++--------------------------+ +| [3021, 2147483647, 5014] | ++--------------------------+ +1 row in set (0.02 sec) + +mysql> select topn_array(k3,3,100) from baseall; ++--------------------------+ +| topn_array(`k3`, 3, 100) | ++--------------------------+ +| [3021, 2147483647, 5014] | ++--------------------------+ +1 row in set (0.02 sec) +``` +### keywords +TOPN, TOPN_ARRAY \ No newline at end of file diff --git a/docs/sidebars.json b/docs/sidebars.json index 0333af1eeb..ebf2da3ec4 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -430,6 +430,7 @@ "sql-manual/sql-functions/aggregate-functions/percentile", "sql-manual/sql-functions/aggregate-functions/hll_union_agg", "sql-manual/sql-functions/aggregate-functions/topn", + "sql-manual/sql-functions/aggregate-functions/topn_array", "sql-manual/sql-functions/aggregate-functions/count", "sql-manual/sql-functions/aggregate-functions/sum", "sql-manual/sql-functions/aggregate-functions/max_by", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md new file mode 100644 index 0000000000..8b4985c25d --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md @@ -0,0 +1,60 @@ +--- +{ + "title": "topn_array", + "language": "zh-CN" +} +--- + + + +## topn_array +### description +#### Syntax + +`ARRAY topn_array(expr, INT top_num[, INT space_expand_rate])` + +该topn_array函数使用Space-Saving算法计算expr中的top_num个频繁项,结果为频繁项及其出现次数,该结果为近似值 + +space_expand_rate参数是可选项,该值用来设置Space-Saving算法中使用的counter个数 +``` +counter numbers = top_num * space_expand_rate +``` +space_expand_rate的值越大,结果越准确,默认值为50 + +### example +``` +mysql> select topn_array(k3,3) from baseall; ++--------------------------+ +| topn_array(`k3`, 3) | ++--------------------------+ +| [3021, 2147483647, 5014] | ++--------------------------+ +1 row in set (0.02 sec) + +mysql> select topn_array(k3,3,100) from baseall; ++--------------------------+ +| topn_array(`k3`, 3, 100) | ++--------------------------+ +| [3021, 2147483647, 5014] | ++--------------------------+ +1 row in set (0.02 sec) +``` +### keywords +TOPN, TOPN_ARRAY diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 6f9154cac8..acbb0a5a18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -2466,6 +2466,13 @@ public class FunctionSet { "", "", "", "", "", true, false, true, true)); addBuiltin(AggregateFunction.createBuiltin(COLLECT_SET, Lists.newArrayList(t), new ArrayType(t), t, "", "", "", "", "", true, false, true, true)); + addBuiltin( + AggregateFunction.createBuiltin("topn_array", Lists.newArrayList(t, Type.INT), new ArrayType(t), t, + "", "", "", "", "", true, false, true, true)); + addBuiltin( + AggregateFunction + .createBuiltin("topn_array", Lists.newArrayList(t, Type.INT, Type.INT), new ArrayType(t), t, + "", "", "", "", "", true, false, true, true)); } // Avg diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index 3f6fd5ef0e..be8217e42b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -158,6 +158,8 @@ public abstract class Type { arraySubTypes.add(DECIMALV2); arraySubTypes.add(DATE); arraySubTypes.add(DATETIME); + arraySubTypes.add(DATEV2); + arraySubTypes.add(DATETIMEV2); arraySubTypes.add(CHAR); arraySubTypes.add(VARCHAR); arraySubTypes.add(STRING); diff --git a/regression-test/data/correctness_p0/test_select_stddev_variance_window.out b/regression-test/data/correctness_p0/test_select_stddev_variance_window.out index b3a9ee95e5..24fd821c9a 100644 --- a/regression-test/data/correctness_p0/test_select_stddev_variance_window.out +++ b/regression-test/data/correctness_p0/test_select_stddev_variance_window.out @@ -918,55 +918,55 @@ 15 1991.8000000000002 -- !select_default -- -1 5069.4970703125 -2 1991.0 -3 1991.0 -4 1991.0 -5 5069.4970703125 -6 5069.4970703125 -7 1991.0 -8 5069.4970703125 -9 11224.505859375 -10 5068.5966796875 -11 17379.5 -12 11223.8056640625 -13 17379.0 -14 23534.203125 -15 1991.7000732421875 +1 1989.0 +2 1987.5 +3 1987.5 +4 1987.5 +5 1989.0 +6 1989.0 +7 1987.5 +8 1989.0 +9 1990.0 +10 1989.0 +11 1991.0 +12 1123.0 +13 255.0 +14 1123.0 +15 1990.0 -- !select_default -- 1 \N 2 \N 3 1986.0 -4 1989.0 +4 1987.5 5 1989.0 -6 1989.0 -7 1990.7999267578125 -8 29689.203125 -9 29688.802734375 -10 1990.7999267578125 -11 29689.40234375 +6 1987.0 +7 1989.0 +8 1989.0 +9 1985.0 +10 1989.0 +11 1991.0 12 1991.0 -13 29689.40234375 -14 29689.40234375 -15 1990.7999267578125 +13 1991.0 +14 1991.0 +15 1989.0 -- !select_default -- -1 1989.0 -2 1989.0 -3 1990.7999267578125 -4 1990.39990234375 -5 29689.203125 -6 23533.6015625 -7 1991.0 -8 17379.0 -9 17379.0 -10 17379.0 -11 17379.5 -12 17379.0 -13 17379.0 -14 23534.203125 -15 1991.7000732421875 +1 1987.0 +2 1987.5 +3 1989.0 +4 1987.5 +5 1989.0 +6 1987.0 +7 1989.0 +8 1989.0 +9 1989.0 +10 1991.0 +11 1991.0 +12 1991.0 +13 255.0 +14 1123.0 +15 1990.0 -- !select_default -- 1 1989.0 @@ -986,36 +986,36 @@ 15 1992.0 -- !select_default -- -1 5069.4970703125 -2 1991.0 -3 5068.5966796875 -4 11223.8056640625 -5 11224.505859375 -6 17379.5 -7 17379.0 -8 1991.7000732421875 -9 1991.9000244140625 -10 23534.203125 -11 1992.0 -12 29515.802734375 -13 255.0 +1 1989.0 +2 1987.5 +3 1989.0 +4 1123.0 +5 1990.0 +6 1991.0 +7 255.0 +8 1990.0 +9 1991.0 +10 1123.0 +11 1990.5 +12 255.0 +13 -16256.0 14 255.0 15 1992.0 -- !select_default -- 1 1989.0 2 1986.0 -3 1989.0 -4 1990.7999267578125 -5 1989.0 -6 29689.203125 -7 1990.39990234375 -8 23533.6015625 -9 17379.0 -10 1991.0 -11 11223.8056640625 -12 11223.8056640625 -13 5068.5966796875 -14 1991.0 -15 5069.4970703125 +3 1987.5 +4 1989.0 +5 1987.0 +6 1989.0 +7 1987.5 +8 1987.0 +9 1989.0 +10 1989.0 +11 1989.0 +12 1990.0 +13 1989.0 +14 1987.5 +15 1989.0 diff --git a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out index d501683f7f..f26cf1146b 100644 --- a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out +++ b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out @@ -16,3 +16,15 @@ -- !select -- 1 [1, 2, 1, 2] ['hello'] [2022-07-04, 2022-07-04] [1.23] ['hello', 'hello', 'hello', 'hello'] +-- !select43 -- +[10, 8] + +-- !select44 -- +[10, 8] + +-- !select45 -- +[2022-11-02, 2022-11-01] + +-- !select46 -- +[6.8754576, 0.576] + diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out b/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out index b3a9ee95e5..24fd821c9a 100644 --- a/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out @@ -918,55 +918,55 @@ 15 1991.8000000000002 -- !select_default -- -1 5069.4970703125 -2 1991.0 -3 1991.0 -4 1991.0 -5 5069.4970703125 -6 5069.4970703125 -7 1991.0 -8 5069.4970703125 -9 11224.505859375 -10 5068.5966796875 -11 17379.5 -12 11223.8056640625 -13 17379.0 -14 23534.203125 -15 1991.7000732421875 +1 1989.0 +2 1987.5 +3 1987.5 +4 1987.5 +5 1989.0 +6 1989.0 +7 1987.5 +8 1989.0 +9 1990.0 +10 1989.0 +11 1991.0 +12 1123.0 +13 255.0 +14 1123.0 +15 1990.0 -- !select_default -- 1 \N 2 \N 3 1986.0 -4 1989.0 +4 1987.5 5 1989.0 -6 1989.0 -7 1990.7999267578125 -8 29689.203125 -9 29688.802734375 -10 1990.7999267578125 -11 29689.40234375 +6 1987.0 +7 1989.0 +8 1989.0 +9 1985.0 +10 1989.0 +11 1991.0 12 1991.0 -13 29689.40234375 -14 29689.40234375 -15 1990.7999267578125 +13 1991.0 +14 1991.0 +15 1989.0 -- !select_default -- -1 1989.0 -2 1989.0 -3 1990.7999267578125 -4 1990.39990234375 -5 29689.203125 -6 23533.6015625 -7 1991.0 -8 17379.0 -9 17379.0 -10 17379.0 -11 17379.5 -12 17379.0 -13 17379.0 -14 23534.203125 -15 1991.7000732421875 +1 1987.0 +2 1987.5 +3 1989.0 +4 1987.5 +5 1989.0 +6 1987.0 +7 1989.0 +8 1989.0 +9 1989.0 +10 1991.0 +11 1991.0 +12 1991.0 +13 255.0 +14 1123.0 +15 1990.0 -- !select_default -- 1 1989.0 @@ -986,36 +986,36 @@ 15 1992.0 -- !select_default -- -1 5069.4970703125 -2 1991.0 -3 5068.5966796875 -4 11223.8056640625 -5 11224.505859375 -6 17379.5 -7 17379.0 -8 1991.7000732421875 -9 1991.9000244140625 -10 23534.203125 -11 1992.0 -12 29515.802734375 -13 255.0 +1 1989.0 +2 1987.5 +3 1989.0 +4 1123.0 +5 1990.0 +6 1991.0 +7 255.0 +8 1990.0 +9 1991.0 +10 1123.0 +11 1990.5 +12 255.0 +13 -16256.0 14 255.0 15 1992.0 -- !select_default -- 1 1989.0 2 1986.0 -3 1989.0 -4 1990.7999267578125 -5 1989.0 -6 29689.203125 -7 1990.39990234375 -8 23533.6015625 -9 17379.0 -10 1991.0 -11 11223.8056640625 -12 11223.8056640625 -13 5068.5966796875 -14 1991.0 -15 5069.4970703125 +3 1987.5 +4 1989.0 +5 1987.0 +6 1989.0 +7 1987.5 +8 1987.0 +9 1989.0 +10 1989.0 +11 1989.0 +12 1990.0 +13 1989.0 +14 1987.5 +15 1989.0 diff --git a/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy b/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy index 390418ddcf..311ae350c8 100644 --- a/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy +++ b/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy @@ -144,12 +144,12 @@ suite("test_select_stddev_variance_window") { qt_select_default "select k1, percentile(k2,0.8) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" qt_select_default "select k1, percentile(k2,0.8) over (partition by k6 order by k1) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and unbounded following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 preceding) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between current row and current row) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and unbounded following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 preceding) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between current row and current row) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1) from ${tableName} order by k1;" } diff --git a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy index cd7e61d408..a53e795064 100644 --- a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy +++ b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy @@ -47,4 +47,28 @@ suite("test_aggregate_collect") { sql """ CREATE TABLE ${tableCTAS} PROPERTIES("replication_num" = "1") AS SELECT 1,collect_list(c_int),collect_set(c_string),collect_list(c_date),collect_set(c_decimal),collect_list(c_string_not_null) FROM ${tableName} """ qt_select "SELECT * from ${tableCTAS}" + + // topn_array + def tableName_12 = "topn_array" + + sql "DROP TABLE IF EXISTS ${tableName_12}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName_12} ( + id int, + level int, + dt datev2, + num decimal(27,9) + ) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + sql "INSERT INTO ${tableName_12} values(1,10,'2022-11-1',6.8754576), (2,8,'2022-11-3',0.576), (2,10,'2022-11-2',1.234) ,(3,10,'2022-11-2',0.576) ,(5,29,'2022-11-2',6.8754576) ,(6,8,'2022-11-1',6.8754576)" + + qt_select43 "select topn_array(level,2) from ${tableName_12}" + qt_select44 "select topn_array(level,2,100) from ${tableName_12}" + qt_select45 "select topn_array(dt,2,100) from ${tableName_12}" + qt_select46 "select topn_array(num,2,100) from ${tableName_12}" + sql "DROP TABLE IF EXISTS ${tableName_12}" } diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy index b81f400dad..fb319a4192 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy @@ -147,12 +147,12 @@ suite("test_select_stddev_variance_window") { qt_select_default "select k1, percentile(k2,0.8) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" qt_select_default "select k1, percentile(k2,0.8) over (partition by k6 order by k1) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and unbounded following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 preceding) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between current row and current row) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and unbounded following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 preceding) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between current row and current row) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1) from ${tableName} order by k1;" }