From 20b3bdb000b6cfb40ed8159af74e3e826cfb2eb2 Mon Sep 17 00:00:00 2001 From: gitccl <60637740+gitccl@users.noreply.github.com> Date: Fri, 31 Mar 2023 12:51:29 +0800 Subject: [PATCH] [vectorized](function) support array_first_index function (#18175) mysql> select array_first_index(x->x+1>3, [2, 3, 4]); +-------------------------------------------------------------------+ | array_first_index(array_map([x] -> x(0) + 1 > 3, ARRAY(2, 3, 4))) | +-------------------------------------------------------------------+ | 2 | +-------------------------------------------------------------------+ mysql> select array_first_index(x -> x is null, [null, 1, 2]); +----------------------------------------------------------------------+ | array_first_index(array_map([x] -> x(0) IS NULL, ARRAY(NULL, 1, 2))) | +----------------------------------------------------------------------+ | 1 | +----------------------------------------------------------------------+ mysql> select array_first_index(x->power(x,2)>10, [1, 2, 3, 4]); +---------------------------------------------------------------------------------+ | array_first_index(array_map([x] -> power(x(0), 2.0) > 10.0, ARRAY(1, 2, 3, 4))) | +---------------------------------------------------------------------------------+ | 4 | +---------------------------------------------------------------------------------+ --- be/src/vec/CMakeLists.txt | 1 + .../array/function_array_first_index.cpp | 92 +++++++++++++++++++ .../array/function_array_register.cpp | 2 + .../array-functions/array_first_index.md | 87 ++++++++++++++++++ docs/sidebars.json | 1 + .../array-functions/array_first_index.md | 87 ++++++++++++++++++ .../analysis/LambdaFunctionCallExpr.java | 8 +- gensrc/script/doris_builtins_functions.py | 2 + .../test_array_first_index_function.out | 88 ++++++++++++++++++ .../test_array_first_index_function.groovy | 63 +++++++++++++ 10 files changed, 427 insertions(+), 4 deletions(-) create mode 100644 be/src/vec/functions/array/function_array_first_index.cpp create mode 100644 docs/en/docs/sql-manual/sql-functions/array-functions/array_first_index.md create mode 100644 docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_first_index.md create mode 100644 regression-test/data/query_p0/sql_functions/array_functions/test_array_first_index_function.out create mode 100644 regression-test/suites/query_p0/sql_functions/array_functions/test_array_first_index_function.groovy diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index e79bb2faa9..f4fc16147a 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -189,6 +189,7 @@ set(VEC_FILES functions/array/function_array_concat.cpp functions/array/function_array_zip.cpp functions/array/function_array_pushfront.cpp + functions/array/function_array_first_index.cpp functions/function_map.cpp exprs/table_function/vexplode_json_array.cpp functions/math.cpp diff --git a/be/src/vec/functions/array/function_array_first_index.cpp b/be/src/vec/functions/array/function_array_first_index.cpp new file mode 100644 index 0000000000..3bf4379f6a --- /dev/null +++ b/be/src/vec/functions/array/function_array_first_index.cpp @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace doris::vectorized { + +// array_first_index([0, 1, 0]) -> [2] +class FunctionArrayFirstIndex : public IFunction { +public: + static constexpr auto name = "array_first_index"; + + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + bool use_default_implementation_for_nulls() const override { return false; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + auto src_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + const ColumnArray* array_column = nullptr; + const UInt8* array_null_map = nullptr; + if (src_column->is_nullable()) { + auto nullable_array = assert_cast(src_column.get()); + array_column = assert_cast(&nullable_array->get_nested_column()); + array_null_map = nullable_array->get_null_map_column().get_data().data(); + } else { + array_column = assert_cast(src_column.get()); + } + + auto& src_nested_data = array_column->get_data(); + auto& src_offset = array_column->get_offsets(); + + auto result_data_col = ColumnInt64::create(input_rows_count, 0); + auto& result_data = result_data_col->get_data(); + + for (size_t i = 0; i < input_rows_count; ++i) { + if (array_null_map && array_null_map[i]) { + continue; + } + + // default index is 0 if such index is not found + size_t first_index = 0; + for (size_t off = src_offset[i - 1]; off < src_offset[i]; ++off) { + if (!src_nested_data.is_null_at(off) && src_nested_data.get_bool(off)) { + first_index = off - src_offset[i - 1] + 1; + break; + } + } + result_data[i] = first_index; + } + block.replace_by_position(result, std::move(result_data_col)); + return Status::OK(); + } +}; + +void register_function_array_first_index(SimpleFunctionFactory& factory) { + factory.register_function(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp index f70189ce06..d8ff730330 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_array_register.cpp @@ -49,6 +49,7 @@ void register_function_array_apply(SimpleFunctionFactory&); void register_function_array_concat(SimpleFunctionFactory&); void register_function_array_zip(SimpleFunctionFactory&); void register_function_array_pushfront(SimpleFunctionFactory& factory); +void register_function_array_first_index(SimpleFunctionFactory& factory); void register_function_array(SimpleFunctionFactory& factory) { register_function_array_exists(factory); @@ -78,6 +79,7 @@ void register_function_array(SimpleFunctionFactory& factory) { register_function_array_concat(factory); register_function_array_zip(factory); register_function_array_pushfront(factory); + register_function_array_first_index(factory); } } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_first_index.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_first_index.md new file mode 100644 index 0000000000..8d304747b1 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_first_index.md @@ -0,0 +1,87 @@ +--- +{ + "title": "array_first_index", + "language": "en" +} +--- + + + +## array_first_index + + + +array_first_index + + + +### description + +```sql +array_first_index(lambda, array1, ...) +``` + +Use an lambda expression as an input parameter to perform corresponding expression calculations on the internal data of other input ARRAY parameters. Returns the first index such that the return value of `lambda(array1[i], ...)` is not 0. Return 0 if such index is not found. + +There are one or more parameters input in the lambda expression, and the number of elements of all input arrays must be the same. Legal scalar functions can be executed in lambda, aggregate functions, etc. are not supported. + +``` +array_first_index(x->x>1, array1); +array_first_index(x->(x%2 = 0), array1); +array_first_index(x->(abs(x)-1), array1); +array_first_index((x,y)->(x = y), array1, array2); +``` + +### example + +``` +mysql> select array_first_index(x->x+1>3, [2, 3, 4]); ++-------------------------------------------------------------------+ +| array_first_index(array_map([x] -> x(0) + 1 > 3, ARRAY(2, 3, 4))) | ++-------------------------------------------------------------------+ +| 2 | ++-------------------------------------------------------------------+ + +mysql> select array_first_index(x -> x is null, [null, 1, 2]); ++----------------------------------------------------------------------+ +| array_first_index(array_map([x] -> x(0) IS NULL, ARRAY(NULL, 1, 2))) | ++----------------------------------------------------------------------+ +| 1 | ++----------------------------------------------------------------------+ + +mysql> select array_first_index(x->power(x,2)>10, [1, 2, 3, 4]); ++---------------------------------------------------------------------------------+ +| array_first_index(array_map([x] -> power(x(0), 2.0) > 10.0, ARRAY(1, 2, 3, 4))) | ++---------------------------------------------------------------------------------+ +| 4 | ++---------------------------------------------------------------------------------+ + +mysql> select col2, col3, array_first_index((x,y)->x>y, col2, col3) from array_test; ++--------------+--------------+---------------------------------------------------------------------+ +| col2 | col3 | array_first_index(array_map([x, y] -> x(0) > y(1), `col2`, `col3`)) | ++--------------+--------------+---------------------------------------------------------------------+ +| [1, 2, 3] | [3, 4, 5] | 0 | +| [1, NULL, 2] | [NULL, 3, 1] | 3 | +| [1, 2, 3] | [9, 8, 7] | 0 | +| NULL | NULL | 0 | ++--------------+--------------+---------------------------------------------------------------------+ +``` + +### keywords + +ARRAY,FIRST_INDEX,ARRAY_FIRST_INDEX \ No newline at end of file diff --git a/docs/sidebars.json b/docs/sidebars.json index edb2d9b5e1..d64e226e96 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -305,6 +305,7 @@ "sql-manual/sql-functions/array-functions/array_concat", "sql-manual/sql-functions/array-functions/array_zip", "sql-manual/sql-functions/array-functions/array_exists", + "sql-manual/sql-functions/array-functions/array_first_index", "sql-manual/sql-functions/array-functions/arrays_overlap", "sql-manual/sql-functions/array-functions/countequal", "sql-manual/sql-functions/array-functions/element_at" diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_first_index.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_first_index.md new file mode 100644 index 0000000000..fb267cd71d --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_first_index.md @@ -0,0 +1,87 @@ +--- +{ + "title": "array_first_index", + "language": "zh-CN" +} +--- + + + +## array_first_index + + + +array_first_index + + + +### description + +```sql +array_first_index(lambda, array1, ...) +``` + +使用lambda表达式作为输入参数,对其他输入ARRAY参数的内部数据进行相应的表达式计算。 返回第一个使得 `lambda(array1[i], ...)` 返回值不为 0 的索引。如果没找到满足此条件的索引,则返回 0。 + +在lambda表达式中输入的参数为1个或多个,所有输入的array的元素数量必须一致。在lambda中可以执行合法的标量函数,不支持聚合函数等。 + +``` +array_first_index(x->x>1, array1); +array_first_index(x->(x%2 = 0), array1); +array_first_index(x->(abs(x)-1), array1); +array_first_index((x,y)->(x = y), array1, array2); +``` + +### example + +``` +mysql> select array_first_index(x->x+1>3, [2, 3, 4]); ++-------------------------------------------------------------------+ +| array_first_index(array_map([x] -> x(0) + 1 > 3, ARRAY(2, 3, 4))) | ++-------------------------------------------------------------------+ +| 2 | ++-------------------------------------------------------------------+ + +mysql> select array_first_index(x -> x is null, [null, 1, 2]); ++----------------------------------------------------------------------+ +| array_first_index(array_map([x] -> x(0) IS NULL, ARRAY(NULL, 1, 2))) | ++----------------------------------------------------------------------+ +| 1 | ++----------------------------------------------------------------------+ + +mysql> select array_first_index(x->power(x,2)>10, [1, 2, 3, 4]); ++---------------------------------------------------------------------------------+ +| array_first_index(array_map([x] -> power(x(0), 2.0) > 10.0, ARRAY(1, 2, 3, 4))) | ++---------------------------------------------------------------------------------+ +| 4 | ++---------------------------------------------------------------------------------+ + +mysql> select col2, col3, array_first_index((x,y)->x>y, col2, col3) from array_test; ++--------------+--------------+---------------------------------------------------------------------+ +| col2 | col3 | array_first_index(array_map([x, y] -> x(0) > y(1), `col2`, `col3`)) | ++--------------+--------------+---------------------------------------------------------------------+ +| [1, 2, 3] | [3, 4, 5] | 0 | +| [1, NULL, 2] | [NULL, 3, 1] | 3 | +| [1, 2, 3] | [9, 8, 7] | 0 | +| NULL | NULL | 0 | ++--------------+--------------+---------------------------------------------------------------------+ +``` + +### keywords + +ARRAY,FIRST_INDEX,ARRAY_FIRST_INDEX \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java index b1277f2ef5..546ef19822 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java @@ -35,12 +35,12 @@ import java.util.List; public class LambdaFunctionCallExpr extends FunctionCallExpr { public static final ImmutableSet LAMBDA_FUNCTION_SET = new ImmutableSortedSet.Builder( String.CASE_INSENSITIVE_ORDER).add("array_map").add("array_filter").add("array_exists").add("array_sortby") - .build(); + .add("array_first_index").build(); // The functions in this set are all normal array functions when implemented initially. // and then wants add lambda expr as the input param, so we rewrite it to contains an array_map lambda function // rather than reimplementing a lambda function, this will be reused the implementation of normal array function public static final ImmutableSet LAMBDA_MAPPED_FUNCTION_SET = new ImmutableSortedSet.Builder( - String.CASE_INSENSITIVE_ORDER).add("array_exists").add("array_sortby").build(); + String.CASE_INSENSITIVE_ORDER).add("array_exists").add("array_sortby").add("array_first_index").build(); private static final Logger LOG = LogManager.getLogger(LambdaFunctionCallExpr.class); @@ -105,7 +105,8 @@ public class LambdaFunctionCallExpr extends FunctionCallExpr { throw new AnalysisException(getFunctionNotFoundError(collectChildReturnTypes())); } fn.setReturnType(ArrayType.create(lambda.getChild(0).getType(), true)); - } else if (fnName.getFunction().equalsIgnoreCase("array_exists")) { + } else if (fnName.getFunction().equalsIgnoreCase("array_exists") + || fnName.getFunction().equalsIgnoreCase("array_first_index")) { if (fnParams.exprs() == null || fnParams.exprs().size() < 1) { throw new AnalysisException("The " + fnName.getFunction() + " function must have at least one param"); } @@ -138,7 +139,6 @@ public class LambdaFunctionCallExpr extends FunctionCallExpr { LOG.warn("fn {} not exists", this.toSqlImpl()); throw new AnalysisException(getFunctionNotFoundError(collectChildReturnTypes())); } - fn.setReturnType(getChild(0).getType()); } else if (fnName.getFunction().equalsIgnoreCase("array_filter")) { if (fnParams.exprs() == null || fnParams.exprs().size() != 2) { throw new AnalysisException("The " + fnName.getFunction() + " function must have two params"); diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index fbf4928661..b1063f77d1 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -675,6 +675,8 @@ visible_functions = [ [['array_exists'], 'ARRAY_BOOLEAN', ['ARRAY_VARCHAR'], ''], [['array_exists'], 'ARRAY_BOOLEAN', ['ARRAY_STRING'], ''], + [['array_first_index'], 'BIGINT', ['ARRAY_BOOLEAN'], 'ALWAYS_NOT_NULLABLE'], + [['array_pushfront'], 'ARRAY_BOOLEAN', ['ARRAY_BOOLEAN', 'BOOLEAN'], 'ALWAYS_NULLABLE'], [['array_pushfront'], 'ARRAY_TINYINT', ['ARRAY_TINYINT', 'TINYINT'], 'ALWAYS_NULLABLE'], [['array_pushfront'], 'ARRAY_SMALLINT', ['ARRAY_SMALLINT', 'SMALLINT'], 'ALWAYS_NULLABLE'], diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_first_index_function.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_first_index_function.out new file mode 100644 index 0000000000..cad677fa22 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_first_index_function.out @@ -0,0 +1,88 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +2 + +-- !select -- +0 + +-- !select -- +0 + +-- !select -- +1 + +-- !select -- +2 + +-- !select -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 3 +2 [6, 7, 8] [10, 12, 13] 3 +3 [1] [-100] 3 +4 [1, NULL, 2] [NULL, 3, 1] 3 +5 [] [] 3 +6 \N \N 3 + +-- !select -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 1 +2 [6, 7, 8] [10, 12, 13] 1 +3 [1] [-100] 1 +4 [1, NULL, 2] [NULL, 3, 1] 1 +5 [] [] 1 +6 \N \N 1 + +-- !select -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 2 +2 [6, 7, 8] [10, 12, 13] 2 +3 [1] [-100] 2 +4 [1, NULL, 2] [NULL, 3, 1] 2 +5 [] [] 2 +6 \N \N 2 + +-- !select -- +[1, 2, 3, 4, 5] 1 +[6, 7, 8] 1 +[1] 1 +[1, NULL, 2] 1 +[] 0 +\N 0 + +-- !select -- +[1, 2, 3, 4, 5] 4 +[6, 7, 8] 1 +[1] 0 +[1, NULL, 2] 0 +[] 0 +\N 0 + +-- !select -- +[10, 20, -40, 80, -100] 2 +[10, 12, 13] 2 +[-100] 1 +[NULL, 3, 1] 0 +[] 0 +\N 0 + +-- !select -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 3 +[6, 7, 8] [10, 12, 13] 0 +[1] [-100] 1 +[1, NULL, 2] [NULL, 3, 1] 3 +[] [] 0 +\N \N 0 + +-- !select -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 1 +[6, 7, 8] [10, 12, 13] 1 +[1] [-100] 1 +[1, NULL, 2] [NULL, 3, 1] 3 +[] [] 0 +\N \N 0 + +-- !select -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 2 +[6, 7, 8] [10, 12, 13] 1 +[1] [-100] 1 +[1, NULL, 2] [NULL, 3, 1] 0 +[] [] 0 +\N \N 0 + diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_first_index_function.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_first_index_function.groovy new file mode 100644 index 0000000000..480208f064 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_first_index_function.groovy @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_first_index_function") { + + def tableName = "array_first_index_table" + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE IF NOT EXISTS `${tableName}` ( + `id` int(11) NULL, + `c_array1` array NULL, + `c_array2` array NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ) + """ + + + sql """ + INSERT INTO ${tableName} values + (1, [1,2,3,4,5], [10,20,-40,80,-100]), + (2, [6,7,8],[10,12,13]), (3, [1],[-100]), + (4, [1, null, 2], [null, 3, 1]), (5, [], []), (6, null, null) + """ + + qt_select "select array_first_index(x-> x + 1 > 2, [1, 2, 3])" + qt_select "select array_first_index(x -> x > 1,[]);" + qt_select "select array_first_index(x -> x > 1, [null]);" + qt_select "select array_first_index(x -> x is null, [null, 1, 2]);" + qt_select "select array_first_index(x -> x > 2, array_map(x->power(x,2),[1,2,3]));" + + qt_select "select *, array_first_index(x->x>2,[1,2,3]) from ${tableName} order by id;" + qt_select "select *, array_first_index(x->x+1,[1,2,3]) from ${tableName} order by id;" + qt_select "select *, array_first_index(x->x%2=0,[1,2,3]) from ${tableName} order by id;" + + qt_select "select c_array1, array_first_index(x->x,c_array1) from ${tableName} order by id;" + qt_select "select c_array1, array_first_index(x->x>3,c_array1) from ${tableName} order by id;" + qt_select "select c_array2, array_first_index(x->power(x,2)>100,c_array2) from ${tableName} order by id;" + + qt_select "select c_array1, c_array2, array_first_index((x,y)->x>y, c_array1, c_array2) from ${tableName} order by id;" + qt_select "select c_array1, c_array2, array_first_index((x,y)->x+y, c_array1, c_array2) from ${tableName} order by id;" + qt_select "select c_array1, c_array2, array_first_index((x,y)->x * abs(y) > 10, c_array1, c_array2) from ${tableName} order by id;" + + sql "DROP TABLE IF EXISTS ${tableName}" +}