diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 7e1e0d3706..725c4c88bc 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -211,7 +211,7 @@ set(VEC_FILES functions/array/function_array_concat.cpp functions/array/function_array_zip.cpp functions/array/function_array_pushfront.cpp - functions/array/function_array_first_index.cpp + functions/array/function_array_first_or_last_index.cpp functions/array/function_array_cum_sum.cpp functions/array/function_array_count.cpp functions/function_map.cpp diff --git a/be/src/vec/functions/array/function_array_first_index.cpp b/be/src/vec/functions/array/function_array_first_or_last_index.cpp similarity index 70% rename from be/src/vec/functions/array/function_array_first_index.cpp rename to be/src/vec/functions/array/function_array_first_or_last_index.cpp index c17f3e0a4d..1e0fa67e4f 100644 --- a/be/src/vec/functions/array/function_array_first_index.cpp +++ b/be/src/vec/functions/array/function_array_first_or_last_index.cpp @@ -44,12 +44,17 @@ class FunctionContext; namespace doris::vectorized { -// array_first_index([0, 1, 0]) -> [2] -class FunctionArrayFirstIndex : public IFunction { +/** + * support array_first_index and array_last_index for input lambda expr + * eg. array_first_index(x -> x == 0, [0, 1, 0]) -> [1] + * array_last_index(x -> x == 0, [0, 1, 0]) -> [3] + */ +template +class FunctionArrayFirstOrLastIndex : public IFunction { public: - static constexpr auto name = "array_first_index"; + static constexpr auto name = first ? "array_first_index" : "array_last_index"; - static FunctionPtr create() { return std::make_shared(); } + static FunctionPtr create() { return std::make_shared(); } String get_name() const override { return name; } @@ -87,22 +92,34 @@ public: } // default index is 0 if such index is not found - size_t first_index = 0; - for (size_t off = src_offset[i - 1]; off < src_offset[i]; ++off) { - if (!src_nested_data.is_null_at(off) && src_nested_data.get_bool(off)) { - first_index = off - src_offset[i - 1] + 1; - break; + size_t res_index = 0; + size_t start_index = src_offset[i - 1]; + size_t end_index = src_offset[i]; + for (size_t off = start_index; off < end_index; ++off) { + if constexpr (first) { + if (!src_nested_data.is_null_at(off) && src_nested_data.get_bool(off)) { + res_index = off - start_index + 1; + break; + } + } else { + size_t reverse_off = start_index + (end_index - 1 - off); + if (!src_nested_data.is_null_at(reverse_off) && + src_nested_data.get_bool(reverse_off)) { + res_index = reverse_off - start_index + 1; + break; + } } } - result_data[i] = first_index; + result_data[i] = res_index; } block.replace_by_position(result, std::move(result_data_col)); return Status::OK(); } }; -void register_function_array_first_index(SimpleFunctionFactory& factory) { - factory.register_function(); +void register_function_array_first_or_last_index(SimpleFunctionFactory& factory) { + factory.register_function>(); + factory.register_function>(); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp index 1c2a4e128a..adb6442f76 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_array_register.cpp @@ -50,7 +50,7 @@ void register_function_array_apply(SimpleFunctionFactory&); void register_function_array_concat(SimpleFunctionFactory&); void register_function_array_zip(SimpleFunctionFactory&); void register_function_array_pushfront(SimpleFunctionFactory& factory); -void register_function_array_first_index(SimpleFunctionFactory& factory); +void register_function_array_first_or_last_index(SimpleFunctionFactory& factory); void register_function_array_cum_sum(SimpleFunctionFactory& factory); void register_function_array_count(SimpleFunctionFactory&); @@ -83,7 +83,7 @@ void register_function_array(SimpleFunctionFactory& factory) { register_function_array_concat(factory); register_function_array_zip(factory); register_function_array_pushfront(factory); - register_function_array_first_index(factory); + register_function_array_first_or_last_index(factory); register_function_array_cum_sum(factory); register_function_array_count(factory); } diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_last_index.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_last_index.md new file mode 100644 index 0000000000..3363de41c2 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_last_index.md @@ -0,0 +1,88 @@ +--- +{ + "title": "array_last_index", + "language": "en" +} +--- + + + +## array_last_index + + + +array_last_index + + + +### description + +#### Syntax + +`ARRAY array_last_index(lambda, ARRAY array1, ...)` + +Use an lambda expression as an input parameter to perform corresponding expression calculations on the internal data of other input ARRAY parameters. Returns the last index such that the return value of `lambda(array1[i], ...)` is not 0. Return 0 if such index is not found. + +There are one or more parameters input in the lambda expression, and the number of elements of all input arrays must be the same. Legal scalar functions can be executed in lambda, aggregate functions, etc. are not supported. + +``` +array_last_index(x->x>1, array1); +array_last_index(x->(x%2 = 0), array1); +array_last_index(x->(abs(x)-1), array1); +array_last_index((x,y)->(x = y), array1, array2); +``` + +### example + +``` +mysql> select array_last_index(x -> x is null, [null, null, 1, 2]); ++------------------------------------------------------------------------+ +| array_last_index(array_map([x] -> x IS NULL, ARRAY(NULL, NULL, 1, 2))) | ++------------------------------------------------------------------------+ +| 2 | ++------------------------------------------------------------------------+ + + +mysql> select array_last_index(x->x='s', ['a', 's', 's', 's', 'b']); ++-----------------------------------------------------------------------------+ +| array_last_index(array_map([x] -> x = 's', ARRAY('a', 's', 's', 's', 'b'))) | ++-----------------------------------------------------------------------------+ +| 4 | ++-----------------------------------------------------------------------------+ + +mysql> select array_last_index(x->power(x,2)>10, [1, 4, 3, 4]); ++-----------------------------------------------------------------------------+ +| array_last_index(array_map([x] -> power(x, 2.0) > 10.0, ARRAY(1, 4, 3, 4))) | ++-----------------------------------------------------------------------------+ +| 4 | ++-----------------------------------------------------------------------------+ + +mysql> select col2, col3, array_last_index((x,y)->x>y, col2, col3) from array_test; ++--------------+--------------+---------------------------------------------------------------------+ +| col2 | col3 | array_last_index(array_map([x, y] -> x(0) > y(1), `col2`, `col3`)) | ++--------------+--------------+---------------------------------------------------------------------+ +| [1, 2, 3] | [3, 4, 5] | 0 | +| [1, NULL, 2] | [NULL, 3, 1] | 3 | +| [1, 2, 3] | [9, 8, 7] | 0 | +| NULL | NULL | 0 | ++--------------+--------------+---------------------------------------------------------------------+ +``` + +### keywords + +ARRAY,FIRST_INDEX,array_last_index \ No newline at end of file diff --git a/docs/sidebars.json b/docs/sidebars.json index de9748cc61..cc1d5f3810 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -310,6 +310,7 @@ "sql-manual/sql-functions/array-functions/array_cum_sum", "sql-manual/sql-functions/array-functions/array_exists", "sql-manual/sql-functions/array-functions/array_first_index", + "sql-manual/sql-functions/array-functions/array_last_index", "sql-manual/sql-functions/array-functions/array_last", "sql-manual/sql-functions/array-functions/arrays_overlap", "sql-manual/sql-functions/array-functions/array_count", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_last_index.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_last_index.md new file mode 100644 index 0000000000..2c15f67ad9 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_last_index.md @@ -0,0 +1,89 @@ +--- +{ + "title": "array_last_index", + "language": "zh-CN" +} +--- + + + +## array_last_index + + + +array_last_index + + + +### description + +#### Syntax + +`ARRAY array_last_index(lambda, ARRAY array1, ...)` + +使用lambda表达式作为输入参数,对其他输入ARRAY参数的内部数据进行相应的表达式计算。 返回最后一个使得 `lambda(array1[i], ...)` 返回值不为 0 的索引。如果没找到满足此条件的索引,则返回 0。 + +在lambda表达式中输入的参数为1个或多个,所有输入的array的元素数量必须一致。在lambda中可以执行合法的标量函数,不支持聚合函数等。 + +``` +array_last_index(x->x>1, array1); +array_last_index(x->(x%2 = 0), array1); +array_last_index(x->(abs(x)-1), array1); +array_last_index((x,y)->(x = y), array1, array2); +``` + +### example + +``` +mysql> select array_last_index(x->x+1>3, [2, 3, 4]); ++-------------------------------------------------------------------+ +| array_last_index(array_map([x] -> x(0) + 1 > 3, ARRAY(2, 3, 4))) | ++-------------------------------------------------------------------+ +| 2 | ++-------------------------------------------------------------------+ + +mysql> select array_last_index(x -> x is null, [null, 1, 2]); ++----------------------------------------------------------------------+ +| array_last_index(array_map([x] -> x(0) IS NULL, ARRAY(NULL, 1, 2))) | ++----------------------------------------------------------------------+ +| 1 | ++----------------------------------------------------------------------+ + +mysql> select array_last_index(x->power(x,2)>10, [1, 2, 3, 4]); ++---------------------------------------------------------------------------------+ +| array_last_index(array_map([x] -> power(x(0), 2.0) > 10.0, ARRAY(1, 2, 3, 4))) | ++---------------------------------------------------------------------------------+ +| 4 | ++---------------------------------------------------------------------------------+ + +mysql> select c_array1, c_array2, array_last_index((x,y)->x>y, c_array1, c_array2) from array_index_table order by id; ++-----------------+-------------------------+----------------------------------------------------------------------+ +| c_array1 | c_array2 | array_last_index(array_map([x, y] -> x > y, `c_array1`, `c_array2`)) | ++-----------------+-------------------------+----------------------------------------------------------------------+ +| [1, 2, 3, 4, 5] | [10, 20, -40, 80, -100] | 5 | +| [6, 7, 8] | [10, 12, 13] | 0 | +| [1] | [-100] | 1 | +| [1, NULL, 2] | [NULL, 3, 1] | 3 | +| [] | [] | 0 | +| NULL | NULL | 0 | ++-----------------+-------------------------+----------------------------------------------------------------------+ +``` + +### keywords + +ARRAY,FIRST_INDEX,array_last_index \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java index 094bac2c6f..9fb9122b3e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java @@ -37,13 +37,13 @@ import java.util.List; public class LambdaFunctionCallExpr extends FunctionCallExpr { public static final ImmutableSet LAMBDA_FUNCTION_SET = new ImmutableSortedSet.Builder( String.CASE_INSENSITIVE_ORDER).add("array_map").add("array_filter").add("array_exists").add("array_sortby") - .add("array_first_index").add("array_last").add("array_count").build(); + .add("array_first_index").add("array_last_index").add("array_last").add("array_count").build(); // The functions in this set are all normal array functions when implemented initially. // and then wants add lambda expr as the input param, so we rewrite it to contains an array_map lambda function // rather than reimplementing a lambda function, this will be reused the implementation of normal array function public static final ImmutableSet LAMBDA_MAPPED_FUNCTION_SET = new ImmutableSortedSet.Builder( String.CASE_INSENSITIVE_ORDER).add("array_exists").add("array_sortby") - .add("array_first_index").add("array_last").add("array_count") + .add("array_first_index").add("array_last_index").add("array_last").add("array_count") .build(); private static final Logger LOG = LogManager.getLogger(LambdaFunctionCallExpr.class); @@ -110,6 +110,7 @@ public class LambdaFunctionCallExpr extends FunctionCallExpr { true, true, NullableMode.DEPEND_ON_ARGUMENT); } else if (fnName.getFunction().equalsIgnoreCase("array_exists") || fnName.getFunction().equalsIgnoreCase("array_first_index") + || fnName.getFunction().equalsIgnoreCase("array_last_index") || fnName.getFunction().equalsIgnoreCase("array_count")) { if (fnParams.exprs() == null || fnParams.exprs().size() < 1) { throw new AnalysisException("The " + fnName.getFunction() + " function must have at least one param"); diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index a81ad078bf..ef0b72ee9e 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -688,6 +688,8 @@ visible_functions = [ [['array_exists'], 'ARRAY_BOOLEAN', ['ARRAY_STRING'], ''], [['array_first_index'], 'BIGINT', ['ARRAY_BOOLEAN'], 'ALWAYS_NOT_NULLABLE'], + [['array_last_index'], 'BIGINT', ['ARRAY_BOOLEAN'], 'ALWAYS_NOT_NULLABLE'], + [['array_count'], 'BIGINT', ['ARRAY_BOOLEAN'], 'ALWAYS_NOT_NULLABLE'], [['array_shuffle', 'shuffle'], 'ARRAY_BOOLEAN', ['ARRAY_BOOLEAN'], ''], diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_last_index_function.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_last_index_function.out new file mode 100644 index 0000000000..acf8f5b777 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_last_index_function.out @@ -0,0 +1,88 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +5 + +-- !select -- +0 + +-- !select -- +0 + +-- !select -- +2 + +-- !select -- +3 + +-- !select -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 3 +2 [6, 7, 8] [10, 12, 13] 3 +3 [1] [-100] 3 +4 [1, NULL, 2] [NULL, 3, 1] 3 +5 [] [] 3 +6 \N \N 3 + +-- !select -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 3 +2 [6, 7, 8] [10, 12, 13] 3 +3 [1] [-100] 3 +4 [1, NULL, 2] [NULL, 3, 1] 3 +5 [] [] 3 +6 \N \N 3 + +-- !select -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 2 +2 [6, 7, 8] [10, 12, 13] 2 +3 [1] [-100] 2 +4 [1, NULL, 2] [NULL, 3, 1] 2 +5 [] [] 2 +6 \N \N 2 + +-- !select -- +[1, 2, 3, 4, 5] 5 +[6, 7, 8] 3 +[1] 1 +[1, NULL, 2] 3 +[] 0 +\N 0 + +-- !select -- +[1, 2, 3, 4, 5] 5 +[6, 7, 8] 3 +[1] 0 +[1, NULL, 2] 0 +[] 0 +\N 0 + +-- !select -- +[10, 20, -40, 80, -100] 5 +[10, 12, 13] 3 +[-100] 1 +[NULL, 3, 1] 0 +[] 0 +\N 0 + +-- !select -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 5 +[6, 7, 8] [10, 12, 13] 0 +[1] [-100] 1 +[1, NULL, 2] [NULL, 3, 1] 3 +[] [] 0 +\N \N 0 + +-- !select -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 5 +[6, 7, 8] [10, 12, 13] 3 +[1] [-100] 1 +[1, NULL, 2] [NULL, 3, 1] 3 +[] [] 0 +\N \N 0 + +-- !select -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 5 +[6, 7, 8] [10, 12, 13] 3 +[1] [-100] 1 +[1, NULL, 2] [NULL, 3, 1] 0 +[] [] 0 +\N \N 0 + diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_last_index_function.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_last_index_function.groovy new file mode 100644 index 0000000000..63cad84932 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_last_index_function.groovy @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_last_index_function") { + + def tableName = "array_last_index_table" + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE IF NOT EXISTS `${tableName}` ( + `id` int(11) NULL, + `c_array1` array NULL, + `c_array2` array NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ) + """ + + + sql """ + INSERT INTO ${tableName} values + (1, [1,2,3,4,5], [10,20,-40,80,-100]), + (2, [6,7,8],[10,12,13]), (3, [1],[-100]), + (4, [1, null, 2], [null, 3, 1]), (5, [], []), (6, null, null) + """ + + qt_select "select array_last_index(x-> x + 1 > 2, [1, 2, 3, 3, 3])" + qt_select "select array_last_index(x -> x > 1,[]);" + qt_select "select array_last_index(x -> x > 1, [null]);" + qt_select "select array_last_index(x -> x is null, [null, null, 1, 2]);" + qt_select "select array_last_index(x -> x > 2, array_map(x->power(x,2),[1,2,3]));" + + qt_select "select *, array_last_index(x->x>2,[1,2,3]) from ${tableName} order by id;" + qt_select "select *, array_last_index(x->x+1,[1,2,3]) from ${tableName} order by id;" + qt_select "select *, array_last_index(x->x%2=0,[1,2,3]) from ${tableName} order by id;" + + qt_select "select c_array1, array_last_index(x->x,c_array1) from ${tableName} order by id;" + qt_select "select c_array1, array_last_index(x->x>3,c_array1) from ${tableName} order by id;" + qt_select "select c_array2, array_last_index(x->power(x,2)>100,c_array2) from ${tableName} order by id;" + + qt_select "select c_array1, c_array2, array_last_index((x,y)->x>y, c_array1, c_array2) from ${tableName} order by id;" + qt_select "select c_array1, c_array2, array_last_index((x,y)->x+y, c_array1, c_array2) from ${tableName} order by id;" + qt_select "select c_array1, c_array2, array_last_index((x,y)->x * abs(y) > 10, c_array1, c_array2) from ${tableName} order by id;" + + sql "DROP TABLE IF EXISTS ${tableName}" +}