[Feature](array-functions)improve array functions for array_last_index (#20294)
Now we just support array_first_index for lambda input , but no array_last_index
This commit is contained in:
@ -211,7 +211,7 @@ set(VEC_FILES
|
||||
functions/array/function_array_concat.cpp
|
||||
functions/array/function_array_zip.cpp
|
||||
functions/array/function_array_pushfront.cpp
|
||||
functions/array/function_array_first_index.cpp
|
||||
functions/array/function_array_first_or_last_index.cpp
|
||||
functions/array/function_array_cum_sum.cpp
|
||||
functions/array/function_array_count.cpp
|
||||
functions/function_map.cpp
|
||||
|
||||
@ -44,12 +44,17 @@ class FunctionContext;
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
// array_first_index([0, 1, 0]) -> [2]
|
||||
class FunctionArrayFirstIndex : public IFunction {
|
||||
/**
|
||||
* support array_first_index and array_last_index for input lambda expr
|
||||
* eg. array_first_index(x -> x == 0, [0, 1, 0]) -> [1]
|
||||
* array_last_index(x -> x == 0, [0, 1, 0]) -> [3]
|
||||
*/
|
||||
template <bool first>
|
||||
class FunctionArrayFirstOrLastIndex : public IFunction {
|
||||
public:
|
||||
static constexpr auto name = "array_first_index";
|
||||
static constexpr auto name = first ? "array_first_index" : "array_last_index";
|
||||
|
||||
static FunctionPtr create() { return std::make_shared<FunctionArrayFirstIndex>(); }
|
||||
static FunctionPtr create() { return std::make_shared<FunctionArrayFirstOrLastIndex>(); }
|
||||
|
||||
String get_name() const override { return name; }
|
||||
|
||||
@ -87,22 +92,34 @@ public:
|
||||
}
|
||||
|
||||
// default index is 0 if such index is not found
|
||||
size_t first_index = 0;
|
||||
for (size_t off = src_offset[i - 1]; off < src_offset[i]; ++off) {
|
||||
if (!src_nested_data.is_null_at(off) && src_nested_data.get_bool(off)) {
|
||||
first_index = off - src_offset[i - 1] + 1;
|
||||
break;
|
||||
size_t res_index = 0;
|
||||
size_t start_index = src_offset[i - 1];
|
||||
size_t end_index = src_offset[i];
|
||||
for (size_t off = start_index; off < end_index; ++off) {
|
||||
if constexpr (first) {
|
||||
if (!src_nested_data.is_null_at(off) && src_nested_data.get_bool(off)) {
|
||||
res_index = off - start_index + 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
size_t reverse_off = start_index + (end_index - 1 - off);
|
||||
if (!src_nested_data.is_null_at(reverse_off) &&
|
||||
src_nested_data.get_bool(reverse_off)) {
|
||||
res_index = reverse_off - start_index + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
result_data[i] = first_index;
|
||||
result_data[i] = res_index;
|
||||
}
|
||||
block.replace_by_position(result, std::move(result_data_col));
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
|
||||
void register_function_array_first_index(SimpleFunctionFactory& factory) {
|
||||
factory.register_function<FunctionArrayFirstIndex>();
|
||||
void register_function_array_first_or_last_index(SimpleFunctionFactory& factory) {
|
||||
factory.register_function<FunctionArrayFirstOrLastIndex<true>>();
|
||||
factory.register_function<FunctionArrayFirstOrLastIndex<false>>();
|
||||
}
|
||||
|
||||
} // namespace doris::vectorized
|
||||
@ -50,7 +50,7 @@ void register_function_array_apply(SimpleFunctionFactory&);
|
||||
void register_function_array_concat(SimpleFunctionFactory&);
|
||||
void register_function_array_zip(SimpleFunctionFactory&);
|
||||
void register_function_array_pushfront(SimpleFunctionFactory& factory);
|
||||
void register_function_array_first_index(SimpleFunctionFactory& factory);
|
||||
void register_function_array_first_or_last_index(SimpleFunctionFactory& factory);
|
||||
void register_function_array_cum_sum(SimpleFunctionFactory& factory);
|
||||
void register_function_array_count(SimpleFunctionFactory&);
|
||||
|
||||
@ -83,7 +83,7 @@ void register_function_array(SimpleFunctionFactory& factory) {
|
||||
register_function_array_concat(factory);
|
||||
register_function_array_zip(factory);
|
||||
register_function_array_pushfront(factory);
|
||||
register_function_array_first_index(factory);
|
||||
register_function_array_first_or_last_index(factory);
|
||||
register_function_array_cum_sum(factory);
|
||||
register_function_array_count(factory);
|
||||
}
|
||||
|
||||
@ -0,0 +1,88 @@
|
||||
---
|
||||
{
|
||||
"title": "array_last_index",
|
||||
"language": "en"
|
||||
}
|
||||
---
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
## array_last_index
|
||||
|
||||
<version since="2.0">
|
||||
|
||||
array_last_index
|
||||
|
||||
</version>
|
||||
|
||||
### description
|
||||
|
||||
#### Syntax
|
||||
|
||||
`ARRAY<T> array_last_index(lambda, ARRAY<T> array1, ...)`
|
||||
|
||||
Use an lambda expression as an input parameter to perform corresponding expression calculations on the internal data of other input ARRAY parameters. Returns the last index such that the return value of `lambda(array1[i], ...)` is not 0. Return 0 if such index is not found.
|
||||
|
||||
There are one or more parameters input in the lambda expression, and the number of elements of all input arrays must be the same. Legal scalar functions can be executed in lambda, aggregate functions, etc. are not supported.
|
||||
|
||||
```
|
||||
array_last_index(x->x>1, array1);
|
||||
array_last_index(x->(x%2 = 0), array1);
|
||||
array_last_index(x->(abs(x)-1), array1);
|
||||
array_last_index((x,y)->(x = y), array1, array2);
|
||||
```
|
||||
|
||||
### example
|
||||
|
||||
```
|
||||
mysql> select array_last_index(x -> x is null, [null, null, 1, 2]);
|
||||
+------------------------------------------------------------------------+
|
||||
| array_last_index(array_map([x] -> x IS NULL, ARRAY(NULL, NULL, 1, 2))) |
|
||||
+------------------------------------------------------------------------+
|
||||
| 2 |
|
||||
+------------------------------------------------------------------------+
|
||||
|
||||
|
||||
mysql> select array_last_index(x->x='s', ['a', 's', 's', 's', 'b']);
|
||||
+-----------------------------------------------------------------------------+
|
||||
| array_last_index(array_map([x] -> x = 's', ARRAY('a', 's', 's', 's', 'b'))) |
|
||||
+-----------------------------------------------------------------------------+
|
||||
| 4 |
|
||||
+-----------------------------------------------------------------------------+
|
||||
|
||||
mysql> select array_last_index(x->power(x,2)>10, [1, 4, 3, 4]);
|
||||
+-----------------------------------------------------------------------------+
|
||||
| array_last_index(array_map([x] -> power(x, 2.0) > 10.0, ARRAY(1, 4, 3, 4))) |
|
||||
+-----------------------------------------------------------------------------+
|
||||
| 4 |
|
||||
+-----------------------------------------------------------------------------+
|
||||
|
||||
mysql> select col2, col3, array_last_index((x,y)->x>y, col2, col3) from array_test;
|
||||
+--------------+--------------+---------------------------------------------------------------------+
|
||||
| col2 | col3 | array_last_index(array_map([x, y] -> x(0) > y(1), `col2`, `col3`)) |
|
||||
+--------------+--------------+---------------------------------------------------------------------+
|
||||
| [1, 2, 3] | [3, 4, 5] | 0 |
|
||||
| [1, NULL, 2] | [NULL, 3, 1] | 3 |
|
||||
| [1, 2, 3] | [9, 8, 7] | 0 |
|
||||
| NULL | NULL | 0 |
|
||||
+--------------+--------------+---------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
### keywords
|
||||
|
||||
ARRAY,FIRST_INDEX,array_last_index
|
||||
@ -310,6 +310,7 @@
|
||||
"sql-manual/sql-functions/array-functions/array_cum_sum",
|
||||
"sql-manual/sql-functions/array-functions/array_exists",
|
||||
"sql-manual/sql-functions/array-functions/array_first_index",
|
||||
"sql-manual/sql-functions/array-functions/array_last_index",
|
||||
"sql-manual/sql-functions/array-functions/array_last",
|
||||
"sql-manual/sql-functions/array-functions/arrays_overlap",
|
||||
"sql-manual/sql-functions/array-functions/array_count",
|
||||
|
||||
@ -0,0 +1,89 @@
|
||||
---
|
||||
{
|
||||
"title": "array_last_index",
|
||||
"language": "zh-CN"
|
||||
}
|
||||
---
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
## array_last_index
|
||||
|
||||
<version since="2.0">
|
||||
|
||||
array_last_index
|
||||
|
||||
</version>
|
||||
|
||||
### description
|
||||
|
||||
#### Syntax
|
||||
|
||||
`ARRAY<T> array_last_index(lambda, ARRAY<T> array1, ...)`
|
||||
|
||||
使用lambda表达式作为输入参数,对其他输入ARRAY参数的内部数据进行相应的表达式计算。 返回最后一个使得 `lambda(array1[i], ...)` 返回值不为 0 的索引。如果没找到满足此条件的索引,则返回 0。
|
||||
|
||||
在lambda表达式中输入的参数为1个或多个,所有输入的array的元素数量必须一致。在lambda中可以执行合法的标量函数,不支持聚合函数等。
|
||||
|
||||
```
|
||||
array_last_index(x->x>1, array1);
|
||||
array_last_index(x->(x%2 = 0), array1);
|
||||
array_last_index(x->(abs(x)-1), array1);
|
||||
array_last_index((x,y)->(x = y), array1, array2);
|
||||
```
|
||||
|
||||
### example
|
||||
|
||||
```
|
||||
mysql> select array_last_index(x->x+1>3, [2, 3, 4]);
|
||||
+-------------------------------------------------------------------+
|
||||
| array_last_index(array_map([x] -> x(0) + 1 > 3, ARRAY(2, 3, 4))) |
|
||||
+-------------------------------------------------------------------+
|
||||
| 2 |
|
||||
+-------------------------------------------------------------------+
|
||||
|
||||
mysql> select array_last_index(x -> x is null, [null, 1, 2]);
|
||||
+----------------------------------------------------------------------+
|
||||
| array_last_index(array_map([x] -> x(0) IS NULL, ARRAY(NULL, 1, 2))) |
|
||||
+----------------------------------------------------------------------+
|
||||
| 1 |
|
||||
+----------------------------------------------------------------------+
|
||||
|
||||
mysql> select array_last_index(x->power(x,2)>10, [1, 2, 3, 4]);
|
||||
+---------------------------------------------------------------------------------+
|
||||
| array_last_index(array_map([x] -> power(x(0), 2.0) > 10.0, ARRAY(1, 2, 3, 4))) |
|
||||
+---------------------------------------------------------------------------------+
|
||||
| 4 |
|
||||
+---------------------------------------------------------------------------------+
|
||||
|
||||
mysql> select c_array1, c_array2, array_last_index((x,y)->x>y, c_array1, c_array2) from array_index_table order by id;
|
||||
+-----------------+-------------------------+----------------------------------------------------------------------+
|
||||
| c_array1 | c_array2 | array_last_index(array_map([x, y] -> x > y, `c_array1`, `c_array2`)) |
|
||||
+-----------------+-------------------------+----------------------------------------------------------------------+
|
||||
| [1, 2, 3, 4, 5] | [10, 20, -40, 80, -100] | 5 |
|
||||
| [6, 7, 8] | [10, 12, 13] | 0 |
|
||||
| [1] | [-100] | 1 |
|
||||
| [1, NULL, 2] | [NULL, 3, 1] | 3 |
|
||||
| [] | [] | 0 |
|
||||
| NULL | NULL | 0 |
|
||||
+-----------------+-------------------------+----------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
### keywords
|
||||
|
||||
ARRAY,FIRST_INDEX,array_last_index
|
||||
@ -37,13 +37,13 @@ import java.util.List;
|
||||
public class LambdaFunctionCallExpr extends FunctionCallExpr {
|
||||
public static final ImmutableSet<String> LAMBDA_FUNCTION_SET = new ImmutableSortedSet.Builder(
|
||||
String.CASE_INSENSITIVE_ORDER).add("array_map").add("array_filter").add("array_exists").add("array_sortby")
|
||||
.add("array_first_index").add("array_last").add("array_count").build();
|
||||
.add("array_first_index").add("array_last_index").add("array_last").add("array_count").build();
|
||||
// The functions in this set are all normal array functions when implemented initially.
|
||||
// and then wants add lambda expr as the input param, so we rewrite it to contains an array_map lambda function
|
||||
// rather than reimplementing a lambda function, this will be reused the implementation of normal array function
|
||||
public static final ImmutableSet<String> LAMBDA_MAPPED_FUNCTION_SET = new ImmutableSortedSet.Builder(
|
||||
String.CASE_INSENSITIVE_ORDER).add("array_exists").add("array_sortby")
|
||||
.add("array_first_index").add("array_last").add("array_count")
|
||||
.add("array_first_index").add("array_last_index").add("array_last").add("array_count")
|
||||
.build();
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(LambdaFunctionCallExpr.class);
|
||||
@ -110,6 +110,7 @@ public class LambdaFunctionCallExpr extends FunctionCallExpr {
|
||||
true, true, NullableMode.DEPEND_ON_ARGUMENT);
|
||||
} else if (fnName.getFunction().equalsIgnoreCase("array_exists")
|
||||
|| fnName.getFunction().equalsIgnoreCase("array_first_index")
|
||||
|| fnName.getFunction().equalsIgnoreCase("array_last_index")
|
||||
|| fnName.getFunction().equalsIgnoreCase("array_count")) {
|
||||
if (fnParams.exprs() == null || fnParams.exprs().size() < 1) {
|
||||
throw new AnalysisException("The " + fnName.getFunction() + " function must have at least one param");
|
||||
|
||||
@ -688,6 +688,8 @@ visible_functions = [
|
||||
[['array_exists'], 'ARRAY_BOOLEAN', ['ARRAY_STRING'], ''],
|
||||
|
||||
[['array_first_index'], 'BIGINT', ['ARRAY_BOOLEAN'], 'ALWAYS_NOT_NULLABLE'],
|
||||
[['array_last_index'], 'BIGINT', ['ARRAY_BOOLEAN'], 'ALWAYS_NOT_NULLABLE'],
|
||||
|
||||
[['array_count'], 'BIGINT', ['ARRAY_BOOLEAN'], 'ALWAYS_NOT_NULLABLE'],
|
||||
|
||||
[['array_shuffle', 'shuffle'], 'ARRAY_BOOLEAN', ['ARRAY_BOOLEAN'], ''],
|
||||
|
||||
@ -0,0 +1,88 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !select --
|
||||
5
|
||||
|
||||
-- !select --
|
||||
0
|
||||
|
||||
-- !select --
|
||||
0
|
||||
|
||||
-- !select --
|
||||
2
|
||||
|
||||
-- !select --
|
||||
3
|
||||
|
||||
-- !select --
|
||||
1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 3
|
||||
2 [6, 7, 8] [10, 12, 13] 3
|
||||
3 [1] [-100] 3
|
||||
4 [1, NULL, 2] [NULL, 3, 1] 3
|
||||
5 [] [] 3
|
||||
6 \N \N 3
|
||||
|
||||
-- !select --
|
||||
1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 3
|
||||
2 [6, 7, 8] [10, 12, 13] 3
|
||||
3 [1] [-100] 3
|
||||
4 [1, NULL, 2] [NULL, 3, 1] 3
|
||||
5 [] [] 3
|
||||
6 \N \N 3
|
||||
|
||||
-- !select --
|
||||
1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 2
|
||||
2 [6, 7, 8] [10, 12, 13] 2
|
||||
3 [1] [-100] 2
|
||||
4 [1, NULL, 2] [NULL, 3, 1] 2
|
||||
5 [] [] 2
|
||||
6 \N \N 2
|
||||
|
||||
-- !select --
|
||||
[1, 2, 3, 4, 5] 5
|
||||
[6, 7, 8] 3
|
||||
[1] 1
|
||||
[1, NULL, 2] 3
|
||||
[] 0
|
||||
\N 0
|
||||
|
||||
-- !select --
|
||||
[1, 2, 3, 4, 5] 5
|
||||
[6, 7, 8] 3
|
||||
[1] 0
|
||||
[1, NULL, 2] 0
|
||||
[] 0
|
||||
\N 0
|
||||
|
||||
-- !select --
|
||||
[10, 20, -40, 80, -100] 5
|
||||
[10, 12, 13] 3
|
||||
[-100] 1
|
||||
[NULL, 3, 1] 0
|
||||
[] 0
|
||||
\N 0
|
||||
|
||||
-- !select --
|
||||
[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 5
|
||||
[6, 7, 8] [10, 12, 13] 0
|
||||
[1] [-100] 1
|
||||
[1, NULL, 2] [NULL, 3, 1] 3
|
||||
[] [] 0
|
||||
\N \N 0
|
||||
|
||||
-- !select --
|
||||
[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 5
|
||||
[6, 7, 8] [10, 12, 13] 3
|
||||
[1] [-100] 1
|
||||
[1, NULL, 2] [NULL, 3, 1] 3
|
||||
[] [] 0
|
||||
\N \N 0
|
||||
|
||||
-- !select --
|
||||
[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] 5
|
||||
[6, 7, 8] [10, 12, 13] 3
|
||||
[1] [-100] 1
|
||||
[1, NULL, 2] [NULL, 3, 1] 0
|
||||
[] [] 0
|
||||
\N \N 0
|
||||
|
||||
@ -0,0 +1,63 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_array_last_index_function") {
|
||||
|
||||
def tableName = "array_last_index_table"
|
||||
sql "DROP TABLE IF EXISTS ${tableName}"
|
||||
sql """
|
||||
CREATE TABLE IF NOT EXISTS `${tableName}` (
|
||||
`id` int(11) NULL,
|
||||
`c_array1` array<int(11)> NULL,
|
||||
`c_array2` array<int(11)> NULL
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`id`)
|
||||
DISTRIBUTED BY HASH(`id`) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"storage_format" = "V2"
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
sql """
|
||||
INSERT INTO ${tableName} values
|
||||
(1, [1,2,3,4,5], [10,20,-40,80,-100]),
|
||||
(2, [6,7,8],[10,12,13]), (3, [1],[-100]),
|
||||
(4, [1, null, 2], [null, 3, 1]), (5, [], []), (6, null, null)
|
||||
"""
|
||||
|
||||
qt_select "select array_last_index(x-> x + 1 > 2, [1, 2, 3, 3, 3])"
|
||||
qt_select "select array_last_index(x -> x > 1,[]);"
|
||||
qt_select "select array_last_index(x -> x > 1, [null]);"
|
||||
qt_select "select array_last_index(x -> x is null, [null, null, 1, 2]);"
|
||||
qt_select "select array_last_index(x -> x > 2, array_map(x->power(x,2),[1,2,3]));"
|
||||
|
||||
qt_select "select *, array_last_index(x->x>2,[1,2,3]) from ${tableName} order by id;"
|
||||
qt_select "select *, array_last_index(x->x+1,[1,2,3]) from ${tableName} order by id;"
|
||||
qt_select "select *, array_last_index(x->x%2=0,[1,2,3]) from ${tableName} order by id;"
|
||||
|
||||
qt_select "select c_array1, array_last_index(x->x,c_array1) from ${tableName} order by id;"
|
||||
qt_select "select c_array1, array_last_index(x->x>3,c_array1) from ${tableName} order by id;"
|
||||
qt_select "select c_array2, array_last_index(x->power(x,2)>100,c_array2) from ${tableName} order by id;"
|
||||
|
||||
qt_select "select c_array1, c_array2, array_last_index((x,y)->x>y, c_array1, c_array2) from ${tableName} order by id;"
|
||||
qt_select "select c_array1, c_array2, array_last_index((x,y)->x+y, c_array1, c_array2) from ${tableName} order by id;"
|
||||
qt_select "select c_array1, c_array2, array_last_index((x,y)->x * abs(y) > 10, c_array1, c_array2) from ${tableName} order by id;"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${tableName}"
|
||||
}
|
||||
Reference in New Issue
Block a user