From b4aef889f2f26f5ef43d31e1394d7623dc23ce1d Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Sat, 19 Nov 2022 10:49:50 +0800 Subject: [PATCH] [feature-array](array-function) add array constructor function `array()` (#14250) * [feature-array](array-function) add array constructor function `array()` ``` mysql> select array(qid, creationDate) from nested_c_2 limit 10; +------------------------------+ | array(`qid`, `creationDate`) | +------------------------------+ | [1000038, 20090616074056] | | [1000069, 20090616075005] | | [1000130, 20090616080918] | | [1000145, 20090616081545] | +------------------------------+ 10 rows in set (0.01 sec) ``` --- be/src/vec/CMakeLists.txt | 1 + .../array/function_array_constructor.cpp | 90 +++++++++++++++++++ .../array/function_array_register.cpp | 2 + .../sql-functions/array-functions/array.md | 84 +++++++++++++++++ .../sql-functions/array-functions/array.md | 84 +++++++++++++++++ gensrc/script/doris_builtins_functions.py | 22 +++-- .../array_functions/test_array_functions.out | 33 +++++++ .../test_array_functions_by_literal.out | 9 ++ .../test_array_functions_with_where.out | 15 ++++ .../test_array_functions.groovy | 3 + .../test_array_functions_by_literal.groovy | 4 +- .../test_array_functions_with_where.groovy | 3 + 12 files changed, 342 insertions(+), 8 deletions(-) create mode 100644 be/src/vec/functions/array/function_array_constructor.cpp create mode 100644 docs/en/docs/sql-manual/sql-functions/array-functions/array.md create mode 100644 docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array.md diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index b2def81de9..2c9a3b051a 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -162,6 +162,7 @@ set(VEC_FILES functions/array/function_array_range.cpp functions/array/function_array_compact.cpp functions/array/function_array_popback.cpp + functions/array/function_array_constructor.cpp functions/array/function_array_with_constant.cpp exprs/table_function/vexplode_json_array.cpp functions/math.cpp diff --git a/be/src/vec/functions/array/function_array_constructor.cpp b/be/src/vec/functions/array/function_array_constructor.cpp new file mode 100644 index 0000000000..fcb71eeede --- /dev/null +++ b/be/src/vec/functions/array/function_array_constructor.cpp @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include + +#include "vec/columns/column_array.h" +#include "vec/columns/column_string.h" +#include "vec/common/string_ref.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/get_least_supertype.h" +#include "vec/functions/array/function_array_utils.h" +#include "vec/functions/function.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +// construct an array +// array(col1, col2, '22') -> [col1, col2, '22'] +class FunctionArrayConstructor : public IFunction { +public: + static constexpr auto name = "array"; + static FunctionPtr create() { return std::make_shared(); } + + /// Get function name. + String get_name() const override { return name; } + + bool is_variadic() const override { return true; } + + bool use_default_implementation_for_nulls() const override { return false; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + DCHECK(arguments.size() > 0) + << "function: " << get_name() << ", arguments should not be empty"; + return std::make_shared(make_nullable(remove_nullable(arguments[0]))); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + size_t num_element = arguments.size(); + auto result_col = block.get_by_position(result).type->create_column(); + auto result_array_col = static_cast(result_col.get()); + IColumn& result_nested_col = result_array_col->get_data(); + ColumnArray::Offsets64& result_offset_col = result_array_col->get_offsets(); + result_nested_col.reserve(input_rows_count * num_element); + result_offset_col.resize(input_rows_count); + + // convert to nullable column + for (size_t i = 0; i < num_element; ++i) { + auto& col = block.get_by_position(arguments[i]).column; + col = col->convert_to_full_column_if_const(); + if (result_nested_col.is_nullable() && !col->is_nullable()) { + col = ColumnNullable::create(col, ColumnUInt8::create(col->size(), 0)); + } + } + + // insert value into array + ColumnArray::Offset64 offset = 0; + for (size_t row = 0; row < input_rows_count; ++row) { + for (size_t idx = 0; idx < num_element; ++idx) { + result_nested_col.insert_from(*block.get_by_position(arguments[idx]).column, row); + } + offset += num_element; + result_offset_col[row] = offset; + } + block.replace_by_position(result, std::move(result_col)); + return Status::OK(); + } +}; + +void register_function_array_constructor(SimpleFunctionFactory& factory) { + factory.register_function(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp index 82d3b79131..3d13a86862 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_array_register.cpp @@ -40,6 +40,7 @@ void register_function_array_range(SimpleFunctionFactory&); void register_function_array_compact(SimpleFunctionFactory&); void register_function_array_popback(SimpleFunctionFactory&); void register_function_array_with_constant(SimpleFunctionFactory&); +void register_function_array_constructor(SimpleFunctionFactory&); void register_function_array(SimpleFunctionFactory& factory) { register_function_array_element(factory); @@ -60,6 +61,7 @@ void register_function_array(SimpleFunctionFactory& factory) { register_function_array_compact(factory); register_function_array_popback(factory); register_function_array_with_constant(factory); + register_function_array_constructor(factory); } } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array.md new file mode 100644 index 0000000000..0c968395ea --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array.md @@ -0,0 +1,84 @@ +--- +{ + "title": "array constructor", + "language": "en" +} +--- + + + +## array() + +### description + +#### Syntax + +``` +ARRAY array(T, ...) +``` +construct an array with variadic elements and return it, T could be column or literal + +### notice + +`Only supported in vectorized engine` + +### example + +``` +mysql> set enable_vectorized_engine=true; + +mysql> select array("1", 2, 1.1); ++----------------------+ +| array('1', 2, '1.1') | ++----------------------+ +| ['1', '2', '1.1'] | ++----------------------+ +1 row in set (0.00 sec) + + +mysql> select array(null, 1); ++----------------+ +| array(NULL, 1) | ++----------------+ +| [NULL, 1] | ++----------------+ +1 row in set (0.00 sec) + +mysql> select array(1, 2, 3); ++----------------+ +| array(1, 2, 3) | ++----------------+ +| [1, 2, 3] | ++----------------+ +1 row in set (0.00 sec) + +mysql> select array(qid, creationDate, null) from nested limit 4; ++------------------------------------+ +| array(`qid`, `creationDate`, NULL) | ++------------------------------------+ +| [1000038, 20090616074056, NULL] | +| [1000069, 20090616075005, NULL] | +| [1000130, 20090616080918, NULL] | +| [1000145, 20090616081545, NULL] | ++------------------------------------+ +4 rows in set (0.01 sec) +``` + +### keywords + +ARRAY,ARRAY,CONSTRUCTOR diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array.md new file mode 100644 index 0000000000..5e6f9fa688 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array.md @@ -0,0 +1,84 @@ +--- +{ + "title": "array构造函数", + "language": "zh-CN" +} +--- + + + +## array() + +### description + +#### Syntax + +``` +ARRAY array(T, ...) +``` +根据参数构造并返回array, 参数可以是多列或者常量 + +### notice + +`Only supported in vectorized engine` + +### example + +``` +mysql> set enable_vectorized_engine=true; + +mysql> select array("1", 2, 1.1); ++----------------------+ +| array('1', 2, '1.1') | ++----------------------+ +| ['1', '2', '1.1'] | ++----------------------+ +1 row in set (0.00 sec) + + +mysql> select array(null, 1); ++----------------+ +| array(NULL, 1) | ++----------------+ +| [NULL, 1] | ++----------------+ +1 row in set (0.00 sec) + +mysql> select array(1, 2, 3); ++----------------+ +| array(1, 2, 3) | ++----------------+ +| [1, 2, 3] | ++----------------+ +1 row in set (0.00 sec) + +mysql> select array(qid, creationDate, null) from nested limit 4; ++------------------------------------+ +| array(`qid`, `creationDate`, NULL) | ++------------------------------------+ +| [1000038, 20090616074056, NULL] | +| [1000069, 20090616075005, NULL] | +| [1000130, 20090616080918, NULL] | +| [1000145, 20090616081545, NULL] | ++------------------------------------+ +4 rows in set (0.01 sec) +``` + +### keywords + +ARRAY,ARRAY,CONSTRUCTOR diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index d6c25c9086..5a99b2d453 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -110,13 +110,21 @@ visible_functions = [ '15FunctionContextERKNS1_11LargeIntValE', '', '', 'vec', ''], # array functions - [['array'], 'ARRAY', ['INT', '...'], - '_ZN5doris14ArrayFunctions5arrayEPN9doris_udf15FunctionContextEiPKNS1_6IntValE', '', '', '', ''], - [['array'], 'ARRAY', ['VARCHAR', '...'], - '_ZN5doris14ArrayFunctions5arrayEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', '', ''], - [['array'], 'ARRAY', ['ARRAY', '...'], '', '', '', '', ''], - [['array'], 'ARRAY', ['MAP', '...'], '', '', '', '', ''], - [['array'], 'ARRAY', ['STRUCT', '...'], '', '', '', '', ''], + [['array'], 'ARRAY', ['BOOLEAN', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['TINYINT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['SMALLINT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['INT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['BIGINT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['LARGEINT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['DATETIME', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['DATE', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['DATETIMEV2', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['DATEV2', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['FLOAT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['DOUBLE', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['DECIMALV2', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['VARCHAR', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['array'], 'ARRAY', ['STRING', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], [['element_at', '%element_extract%'], 'BOOLEAN', ['ARRAY_BOOLEAN', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], [['element_at', '%element_extract%'], 'TINYINT', ['ARRAY_TINYINT', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out index 881fc309d2..d7f366c0c8 100644 --- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out @@ -307,6 +307,39 @@ 8 [123, 123] 9 [123, 123] +-- !select -- +1 [2, 1] +2 [2, 2] +3 [2, 3] +4 [2, 4] +5 [2, 5] +6 [2, 6] +7 [2, 7] +8 [2, 8] +9 [2, 9] + +-- !select -- +1 ['1', NULL, '2020-01-01'] +2 ['2', NULL, '2020-01-01'] +3 ['3', NULL, '2020-01-01'] +4 ['4', NULL, '2020-01-01'] +5 ['5', NULL, '2020-01-01'] +6 ['6', NULL, '2020-01-01'] +7 ['7', NULL, '2020-01-01'] +8 ['8', NULL, '2020-01-01'] +9 ['9', NULL, '2020-01-01'] + +-- !select -- +1 [NULL, 1] +2 [NULL, 2] +3 [NULL, 3] +4 [NULL, 4] +5 [NULL, 5] +6 [NULL, 6] +7 [NULL, 7] +8 [NULL, 8] +9 [NULL, 9] + -- !select -- \N \N -1 \N diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out index 137e978241..ef20547749 100644 --- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out @@ -407,3 +407,12 @@ _ -- !sql -- ['2015-03-13'] +-- !sql -- +[8, NULL] + +-- !sql -- +['a', '1', '2'] + +-- !sql -- +[NULL, NULL, NULL] + diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_with_where.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_with_where.out index 610a739472..042c5337f5 100644 --- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_with_where.out +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_with_where.out @@ -18,3 +18,18 @@ -- !select -- \N [NULL, NULL, NULL, NULL, NULL] +-- !select -- +1 [5, 1] +2 [5, 2] +3 [5, 3] + +-- !select -- +1 ['1', 'abc'] +2 ['2', 'abc'] +3 ['3', 'abc'] + +-- !select -- +1 [NULL, 1] +2 [NULL, 2] +3 [NULL, 3] + diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy index cc478364e0..223997ed40 100644 --- a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy @@ -76,6 +76,9 @@ suite("test_array_functions") { qt_select "SELECT k1, array_with_constant(10, null) from ${tableName} ORDER BY k1" qt_select "SELECT k1, array_with_constant(2, 'a') from ${tableName} ORDER BY k1" qt_select "SELECT k1, array_with_constant(2, 123) from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array(2, k1) from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array(k1, null, '2020-01-01') from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array(null, k1) from ${tableName} ORDER BY k1" def tableName2 = "tbl_test_array_range" sql """DROP TABLE IF EXISTS ${tableName2}""" diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_by_literal.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_by_literal.groovy index 37f3f8037f..1d7ab14f9d 100644 --- a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_by_literal.groovy +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_by_literal.groovy @@ -172,7 +172,6 @@ suite("test_array_functions_by_literal") { qt_sql "select array_with_constant(2, '1')" qt_sql "select array_with_constant(4, 1223)" qt_sql "select array_with_constant(8, null)" - // array_compact function qt_sql "select array_compact([1, 2, 3, 3, null, null, 4, 4])" qt_sql "select array_compact([null, null, null])" @@ -181,6 +180,9 @@ suite("test_array_functions_by_literal") { qt_sql "select array_compact(['aaa','aaa','bbb','ccc','ccccc',null, null,'dddd'])" qt_sql "select array_compact(['2015-03-13','2015-03-13'])" + qt_sql "select array(8, null)" + qt_sql "select array('a', 1, 2)" + qt_sql "select array(null, null, null)" // abnormal test test { sql "select array_intersect([1, 2, 3, 1, 2, 3], '1[3, 2, 5]')" diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_with_where.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_with_where.groovy index 5ad39b199f..3cf2b555e4 100644 --- a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_with_where.groovy +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_with_where.groovy @@ -46,6 +46,9 @@ suite("test_array_functions_with_where") { qt_select "SELECT k1, size(k2) FROM ${tableName} WHERE arrays_overlap(k2, k4) ORDER BY k1" qt_select "SELECT k1, size(k2) FROM ${tableName} WHERE cardinality(k2)>0 ORDER BY k1, size(k2)" qt_select "SELECT k1, array_with_constant(5, k1) FROM ${tableName} WHERE k1 is null ORDER BY k1, size(k2)" + qt_select "SELECT k1, array(5, k1) FROM ${tableName} WHERE k1 is not null ORDER BY k1, size(k2)" + qt_select "SELECT k1, array(k1, 'abc') FROM ${tableName} WHERE k1 is not null ORDER BY k1, size(k2)" + qt_select "SELECT k1, array(null, k1) FROM ${tableName} WHERE k1 is not null ORDER BY k1, size(k2)" test { sql "select k1, size(k2) FROM ${tableName} WHERE k2 = []"