From 7ffe88b579c59b5be3d349816d4019377375a4af Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Mon, 7 Nov 2022 10:48:16 +0800 Subject: [PATCH] [feature-array](array-type) Add array function array_popback (#13641) Remove the last element from array. ``` mysql> select array_popback(['test', NULL, 'value']); +-----------------------------------------------------+ | array_popback(ARRAY('test', NULL, 'value')) | +-----------------------------------------------------+ | [test, NULL] | +-----------------------------------------------------+ ``` --- be/src/vec/CMakeLists.txt | 1 + .../array/function_array_popback.cpp | 84 +++++++++++++++++++ .../array/function_array_register.cpp | 2 + .../functions/array/function_array_slice.h | 47 +---------- .../functions/array/function_array_utils.cpp | 44 ++++++++++ .../functions/array/function_array_utils.h | 4 + .../array-functions/array_popback.md | 57 +++++++++++++ .../array-functions/array_popback.md | 56 +++++++++++++ gensrc/script/doris_builtins_functions.py | 14 ++++ .../array_functions/test_array_functions.out | 36 ++++++++ .../test_array_functions.groovy | 4 + 11 files changed, 303 insertions(+), 46 deletions(-) create mode 100644 be/src/vec/functions/array/function_array_popback.cpp create mode 100644 docs/en/docs/sql-manual/sql-functions/array-functions/array_popback.md create mode 100644 docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_popback.md diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 6d44d0cff9..70b3368ac1 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -153,6 +153,7 @@ set(VEC_FILES functions/array/function_array_difference.cpp functions/array/function_array_enumerate.cpp functions/array/function_array_range.cpp + functions/array/function_array_popback.cpp exprs/table_function/vexplode_json_array.cpp functions/math.cpp functions/function_bitmap.cpp diff --git a/be/src/vec/functions/array/function_array_popback.cpp b/be/src/vec/functions/array/function_array_popback.cpp new file mode 100644 index 0000000000..ae8c76df66 --- /dev/null +++ b/be/src/vec/functions/array/function_array_popback.cpp @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include + +#include "vec/columns/column_array.h" +#include "vec/columns/column_string.h" +#include "vec/common/string_ref.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_number.h" +#include "vec/functions/array/function_array_utils.h" +#include "vec/functions/function.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +class FunctionArrayPopback : public IFunction { +public: + static constexpr auto name = "array_popback"; + static FunctionPtr create() { return std::make_shared(); } + + /// Get function name. + String get_name() const override { return name; } + + bool is_variadic() const override { return false; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + DCHECK(is_array(arguments[0])) + << "First argument for function: " << name + << " should be DataTypeArray but it has type " << arguments[0]->get_name() << "."; + return arguments[0]; + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + auto array_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + // extract src array column + ColumnArrayExecutionData src; + if (!extract_column_array_info(*array_column, src)) { + return Status::RuntimeError( + fmt::format("execute failed, unsupported types for function {}({})", get_name(), + block.get_by_position(arguments[0]).type->get_name())); + } + // prepare dst array column + bool is_nullable = src.nested_nullmap_data ? true : false; + ColumnArrayMutableData dst = create_mutable_data(src.nested_col, is_nullable); + dst.offsets_ptr->reserve(input_rows_count); + // start from 1 + auto offset_column = ColumnInt64::create(array_column->size(), 1); + // len - 1 + auto length_column = ColumnInt64::create(); + for (size_t row = 0; row < src.offsets_ptr->size(); ++row) { + size_t off = (*src.offsets_ptr)[row - 1]; + size_t len = (*src.offsets_ptr)[row] - off; + length_column->insert_value(len - 1); + } + slice_array(dst, src, *offset_column, length_column.get()); + ColumnPtr res_column = assemble_column_array(dst); + block.replace_by_position(result, std::move(res_column)); + return Status::OK(); + } +}; + +void register_function_array_popback(SimpleFunctionFactory& factory) { + factory.register_function(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp index 82c9103cb0..2112d92509 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_array_register.cpp @@ -37,6 +37,7 @@ void register_function_array_slice(SimpleFunctionFactory&); void register_function_array_difference(SimpleFunctionFactory&); void register_function_array_enumerate(SimpleFunctionFactory&); void register_function_array_range(SimpleFunctionFactory&); +void register_function_array_popback(SimpleFunctionFactory&); void register_function_array(SimpleFunctionFactory& factory) { register_function_array_element(factory); @@ -54,6 +55,7 @@ void register_function_array(SimpleFunctionFactory& factory) { register_function_array_difference(factory); register_function_array_enumerate(factory); register_function_array_range(factory); + register_function_array_popback(factory); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_slice.h b/be/src/vec/functions/array/function_array_slice.h index 0a60135637..c80a6834d4 100644 --- a/be/src/vec/functions/array/function_array_slice.h +++ b/be/src/vec/functions/array/function_array_slice.h @@ -79,56 +79,11 @@ public: ColumnArrayMutableData dst = create_mutable_data(src.nested_col, is_nullable); dst.offsets_ptr->reserve(input_rows_count); // execute - _execute_internal(dst, src, *offset_column, length_column.get()); + slice_array(dst, src, *offset_column, length_column.get()); ColumnPtr res_column = assemble_column_array(dst); block.replace_by_position(result, std::move(res_column)); return Status::OK(); } - -private: - void _execute_internal(ColumnArrayMutableData& dst, ColumnArrayExecutionData& src, - const IColumn& offset_column, const IColumn* length_column) { - size_t cur = 0; - for (size_t row = 0; row < src.offsets_ptr->size(); ++row) { - size_t off = (*src.offsets_ptr)[row - 1]; - size_t len = (*src.offsets_ptr)[row] - off; - Int64 start = offset_column.get_int(row); - if (len == 0 || start == 0) { - dst.offsets_ptr->push_back(cur); - continue; - } - if (start > 0 && start <= len) { - start += off - 1; - } else if (start < 0 && -start <= len) { - start += off + len; - } else { - dst.offsets_ptr->push_back(cur); - continue; - } - Int64 end; - if (length_column) { - Int64 size = length_column->get_int(row); - end = std::max((Int64)off, std::min((Int64)(off + len), start + size)); - } else { - end = off + len; - } - for (size_t pos = start; pos < end; ++pos) { - if (src.nested_nullmap_data && src.nested_nullmap_data[pos]) { - dst.nested_col->insert_default(); - dst.nested_nullmap_data->push_back(1); - } else { - dst.nested_col->insert_from(*src.nested_col, pos); - if (dst.nested_nullmap_data) { - dst.nested_nullmap_data->push_back(0); - } - } - } - if (start < end) { - cur += end - start; - } - dst.offsets_ptr->push_back(cur); - } - } }; } // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_utils.cpp b/be/src/vec/functions/array/function_array_utils.cpp index 124b294f05..ae5baac811 100644 --- a/be/src/vec/functions/array/function_array_utils.cpp +++ b/be/src/vec/functions/array/function_array_utils.cpp @@ -68,4 +68,48 @@ MutableColumnPtr assemble_column_array(ColumnArrayMutableData& data) { return ColumnArray::create(std::move(data.array_nested_col), std::move(data.offsets_col)); } +void slice_array(ColumnArrayMutableData& dst, ColumnArrayExecutionData& src, + const IColumn& offset_column, const IColumn* length_column) { + size_t cur = 0; + for (size_t row = 0; row < src.offsets_ptr->size(); ++row) { + size_t off = (*src.offsets_ptr)[row - 1]; + size_t len = (*src.offsets_ptr)[row] - off; + Int64 start = offset_column.get_int(row); + if (len == 0 || start == 0) { + dst.offsets_ptr->push_back(cur); + continue; + } + if (start > 0 && start <= len) { + start += off - 1; + } else if (start < 0 && -start <= len) { + start += off + len; + } else { + dst.offsets_ptr->push_back(cur); + continue; + } + Int64 end; + if (length_column) { + Int64 size = length_column->get_int(row); + end = std::max((Int64)off, std::min((Int64)(off + len), start + size)); + } else { + end = off + len; + } + for (size_t pos = start; pos < end; ++pos) { + if (src.nested_nullmap_data && src.nested_nullmap_data[pos]) { + dst.nested_col->insert_default(); + dst.nested_nullmap_data->push_back(1); + } else { + dst.nested_col->insert_from(*src.nested_col, pos); + if (dst.nested_nullmap_data) { + dst.nested_nullmap_data->push_back(0); + } + } + } + if (start < end) { + cur += end - start; + } + dst.offsets_ptr->push_back(cur); + } +} + } // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_utils.h b/be/src/vec/functions/array/function_array_utils.h index b4859f502b..f488e30786 100644 --- a/be/src/vec/functions/array/function_array_utils.h +++ b/be/src/vec/functions/array/function_array_utils.h @@ -55,4 +55,8 @@ ColumnArrayMutableData create_mutable_data(const IColumn* nested_col, bool is_nu MutableColumnPtr assemble_column_array(ColumnArrayMutableData& data); +// array[offset:length] +void slice_array(ColumnArrayMutableData& dst, ColumnArrayExecutionData& src, + const IColumn& offset_column, const IColumn* length_column); + } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_popback.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_popback.md new file mode 100644 index 0000000000..55f588bf20 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_popback.md @@ -0,0 +1,57 @@ +--- +{ + "title": "array_popback", + "language": "en" +} +--- + + + +## array_popback + +### description + +#### Syntax + +``` +ARRAY array_popback(ARRAY arr) +``` + +Remove the last element from array. + +### notice + +`Only supported in vectorized engine` + +### example + +``` +mysql> set enable_vectorized_engine=true; + +mysql> select array_popback(['test', NULL, 'value']); ++-----------------------------------------------------+ +| array_popback(ARRAY('test', NULL, 'value')) | ++-----------------------------------------------------+ +| [test, NULL] | ++-----------------------------------------------------+ + +``` + +### keywords + +ARRAY,POPBACK,ARRAY_POPBACK diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_popback.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_popback.md new file mode 100644 index 0000000000..4a74b61a4a --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_popback.md @@ -0,0 +1,56 @@ +--- +{ + "title": "array_popback", + "language": "zh-CN" +} +--- + + + +## array_popback + +### description + +#### Syntax + +``` +ARRAY array_popback(ARRAY arr) +``` + +返回移除最后一个元素后的数组,如果输入参数为NULL,则返回NULL + +### notice + +`仅支持向量化引擎中使用` + +### example + +``` +mysql> set enable_vectorized_engine=true; + +mysql> select array_popback(['test', NULL, 'value']); ++-----------------------------------------------------+ +| array_popback(ARRAY('test', NULL, 'value')) | ++-----------------------------------------------------+ +| [test, NULL] | ++-----------------------------------------------------+ +``` + +### keywords + +ARRAY,POPBACK,ARRAY_POPBACK diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 982506f3d4..ee6e726d86 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -435,6 +435,20 @@ visible_functions = [ [['array_slice', '%element_slice%'], 'ARRAY_DECIMALV2', ['ARRAY_DECIMALV2', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''], [['array_slice', '%element_slice%'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''], [['array_slice', '%element_slice%'], 'ARRAY_STRING', ['ARRAY_STRING', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''], + + [['array_popback'], 'ARRAY_BOOLEAN', ['ARRAY_BOOLEAN'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_TINYINT', ['ARRAY_TINYINT'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_SMALLINT', ['ARRAY_SMALLINT'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_INT', ['ARRAY_INT'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_BIGINT', ['ARRAY_BIGINT'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_LARGEINT', ['ARRAY_LARGEINT'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_DATETIME', ['ARRAY_DATETIME'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_DATE', ['ARRAY_DATE'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_FLOAT', ['ARRAY_FLOAT'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_DOUBLE', ['ARRAY_DOUBLE'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_DECIMALV2', ['ARRAY_DECIMALV2'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR'], '', '', '', 'vec', ''], + [['array_popback'], 'ARRAY_STRING', ['ARRAY_STRING'], '', '', '', 'vec', ''], [['array_range'], 'ARRAY_INT', ['INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], [['array_range'], 'ARRAY_INT', ['INT', 'INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out index 7c3fa001a1..bf80ca4ab7 100644 --- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out @@ -179,3 +179,39 @@ 6 \N 7 \N +-- !select -- +1 [1, 2] +2 [] +3 [] +4 [1, 2, 3, 4, 5, 4, 3, 2] +5 [] +6 [1, 2, 3, 4, 5, 4, 3, 2] +7 [8, 9, NULL, 10] + +-- !select -- +1 [] +2 [] +3 [] +4 \N +5 \N +6 \N +7 \N + +-- !select -- +1 [] +2 \N +3 \N +4 \N +5 \N +6 \N +7 \N + +-- !select -- +1 [] +2 \N +3 \N +4 \N +5 \N +6 \N +7 \N + diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy index c4729e0b20..965b132cc4 100644 --- a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy @@ -66,4 +66,8 @@ suite("test_array_functions") { qt_select "SELECT k1, array_enumerate(k5) from ${tableName} ORDER BY k1" qt_select "SELECT k1, array_enumerate(k6) from ${tableName} ORDER BY k1" qt_select "SELECT k1, array_enumerate(k7) from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_popback(k2) from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_popback(k5) from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_popback(k6) from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_popback(k7) from ${tableName} ORDER BY k1" }