[feature-array](array-function) add array constructor function array() (#14250)

* [feature-array](array-function) add array constructor function `array()`

```
mysql>  select array(qid, creationDate) from nested_c_2  limit 10;
+------------------------------+
| array(`qid`, `creationDate`) |
+------------------------------+
| [1000038, 20090616074056]    |
| [1000069, 20090616075005]    |
| [1000130, 20090616080918]    |
| [1000145, 20090616081545]    |
+------------------------------+
10 rows in set (0.01 sec)
```
This commit is contained in:
lihangyu
2022-11-19 10:49:50 +08:00
committed by GitHub
parent 02372ca2ea
commit b4aef889f2
12 changed files with 342 additions and 8 deletions

View File

@ -162,6 +162,7 @@ set(VEC_FILES
functions/array/function_array_range.cpp
functions/array/function_array_compact.cpp
functions/array/function_array_popback.cpp
functions/array/function_array_constructor.cpp
functions/array/function_array_with_constant.cpp
exprs/table_function/vexplode_json_array.cpp
functions/math.cpp

View File

@ -0,0 +1,90 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <string_view>
#include "vec/columns/column_array.h"
#include "vec/columns/column_string.h"
#include "vec/common/string_ref.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/get_least_supertype.h"
#include "vec/functions/array/function_array_utils.h"
#include "vec/functions/function.h"
#include "vec/functions/simple_function_factory.h"
namespace doris::vectorized {
// construct an array
// array(col1, col2, '22') -> [col1, col2, '22']
class FunctionArrayConstructor : public IFunction {
public:
static constexpr auto name = "array";
static FunctionPtr create() { return std::make_shared<FunctionArrayConstructor>(); }
/// Get function name.
String get_name() const override { return name; }
bool is_variadic() const override { return true; }
bool use_default_implementation_for_nulls() const override { return false; }
size_t get_number_of_arguments() const override { return 1; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
DCHECK(arguments.size() > 0)
<< "function: " << get_name() << ", arguments should not be empty";
return std::make_shared<DataTypeArray>(make_nullable(remove_nullable(arguments[0])));
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
size_t num_element = arguments.size();
auto result_col = block.get_by_position(result).type->create_column();
auto result_array_col = static_cast<ColumnArray*>(result_col.get());
IColumn& result_nested_col = result_array_col->get_data();
ColumnArray::Offsets64& result_offset_col = result_array_col->get_offsets();
result_nested_col.reserve(input_rows_count * num_element);
result_offset_col.resize(input_rows_count);
// convert to nullable column
for (size_t i = 0; i < num_element; ++i) {
auto& col = block.get_by_position(arguments[i]).column;
col = col->convert_to_full_column_if_const();
if (result_nested_col.is_nullable() && !col->is_nullable()) {
col = ColumnNullable::create(col, ColumnUInt8::create(col->size(), 0));
}
}
// insert value into array
ColumnArray::Offset64 offset = 0;
for (size_t row = 0; row < input_rows_count; ++row) {
for (size_t idx = 0; idx < num_element; ++idx) {
result_nested_col.insert_from(*block.get_by_position(arguments[idx]).column, row);
}
offset += num_element;
result_offset_col[row] = offset;
}
block.replace_by_position(result, std::move(result_col));
return Status::OK();
}
};
void register_function_array_constructor(SimpleFunctionFactory& factory) {
factory.register_function<FunctionArrayConstructor>();
}
} // namespace doris::vectorized

View File

@ -40,6 +40,7 @@ void register_function_array_range(SimpleFunctionFactory&);
void register_function_array_compact(SimpleFunctionFactory&);
void register_function_array_popback(SimpleFunctionFactory&);
void register_function_array_with_constant(SimpleFunctionFactory&);
void register_function_array_constructor(SimpleFunctionFactory&);
void register_function_array(SimpleFunctionFactory& factory) {
register_function_array_element(factory);
@ -60,6 +61,7 @@ void register_function_array(SimpleFunctionFactory& factory) {
register_function_array_compact(factory);
register_function_array_popback(factory);
register_function_array_with_constant(factory);
register_function_array_constructor(factory);
}
} // namespace doris::vectorized

View File

@ -0,0 +1,84 @@
---
{
"title": "array constructor",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## array()
### description
#### Syntax
```
ARRAY<T> array(T, ...)
```
construct an array with variadic elements and return it, T could be column or literal
### notice
`Only supported in vectorized engine`
### example
```
mysql> set enable_vectorized_engine=true;
mysql> select array("1", 2, 1.1);
+----------------------+
| array('1', 2, '1.1') |
+----------------------+
| ['1', '2', '1.1'] |
+----------------------+
1 row in set (0.00 sec)
mysql> select array(null, 1);
+----------------+
| array(NULL, 1) |
+----------------+
| [NULL, 1] |
+----------------+
1 row in set (0.00 sec)
mysql> select array(1, 2, 3);
+----------------+
| array(1, 2, 3) |
+----------------+
| [1, 2, 3] |
+----------------+
1 row in set (0.00 sec)
mysql> select array(qid, creationDate, null) from nested limit 4;
+------------------------------------+
| array(`qid`, `creationDate`, NULL) |
+------------------------------------+
| [1000038, 20090616074056, NULL] |
| [1000069, 20090616075005, NULL] |
| [1000130, 20090616080918, NULL] |
| [1000145, 20090616081545, NULL] |
+------------------------------------+
4 rows in set (0.01 sec)
```
### keywords
ARRAY,ARRAY,CONSTRUCTOR

View File

@ -0,0 +1,84 @@
---
{
"title": "array构造函数",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## array()
### description
#### Syntax
```
ARRAY<T> array(T, ...)
```
根据参数构造并返回array, 参数可以是多列或者常量
### notice
`Only supported in vectorized engine`
### example
```
mysql> set enable_vectorized_engine=true;
mysql> select array("1", 2, 1.1);
+----------------------+
| array('1', 2, '1.1') |
+----------------------+
| ['1', '2', '1.1'] |
+----------------------+
1 row in set (0.00 sec)
mysql> select array(null, 1);
+----------------+
| array(NULL, 1) |
+----------------+
| [NULL, 1] |
+----------------+
1 row in set (0.00 sec)
mysql> select array(1, 2, 3);
+----------------+
| array(1, 2, 3) |
+----------------+
| [1, 2, 3] |
+----------------+
1 row in set (0.00 sec)
mysql> select array(qid, creationDate, null) from nested limit 4;
+------------------------------------+
| array(`qid`, `creationDate`, NULL) |
+------------------------------------+
| [1000038, 20090616074056, NULL] |
| [1000069, 20090616075005, NULL] |
| [1000130, 20090616080918, NULL] |
| [1000145, 20090616081545, NULL] |
+------------------------------------+
4 rows in set (0.01 sec)
```
### keywords
ARRAY,ARRAY,CONSTRUCTOR

View File

@ -110,13 +110,21 @@ visible_functions = [
'15FunctionContextERKNS1_11LargeIntValE', '', '', 'vec', ''],
# array functions
[['array'], 'ARRAY', ['INT', '...'],
'_ZN5doris14ArrayFunctions5arrayEPN9doris_udf15FunctionContextEiPKNS1_6IntValE', '', '', '', ''],
[['array'], 'ARRAY', ['VARCHAR', '...'],
'_ZN5doris14ArrayFunctions5arrayEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', '', ''],
[['array'], 'ARRAY', ['ARRAY', '...'], '', '', '', '', ''],
[['array'], 'ARRAY', ['MAP', '...'], '', '', '', '', ''],
[['array'], 'ARRAY', ['STRUCT', '...'], '', '', '', '', ''],
[['array'], 'ARRAY', ['BOOLEAN', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['TINYINT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['SMALLINT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['INT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['BIGINT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['LARGEINT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['DATETIME', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['DATE', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['DATETIMEV2', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['DATEV2', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['FLOAT', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['DOUBLE', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['DECIMALV2', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['VARCHAR', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['STRING', '...'], '', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['element_at', '%element_extract%'], 'BOOLEAN', ['ARRAY_BOOLEAN', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'TINYINT', ['ARRAY_TINYINT', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],

View File

@ -307,6 +307,39 @@
8 [123, 123]
9 [123, 123]
-- !select --
1 [2, 1]
2 [2, 2]
3 [2, 3]
4 [2, 4]
5 [2, 5]
6 [2, 6]
7 [2, 7]
8 [2, 8]
9 [2, 9]
-- !select --
1 ['1', NULL, '2020-01-01']
2 ['2', NULL, '2020-01-01']
3 ['3', NULL, '2020-01-01']
4 ['4', NULL, '2020-01-01']
5 ['5', NULL, '2020-01-01']
6 ['6', NULL, '2020-01-01']
7 ['7', NULL, '2020-01-01']
8 ['8', NULL, '2020-01-01']
9 ['9', NULL, '2020-01-01']
-- !select --
1 [NULL, 1]
2 [NULL, 2]
3 [NULL, 3]
4 [NULL, 4]
5 [NULL, 5]
6 [NULL, 6]
7 [NULL, 7]
8 [NULL, 8]
9 [NULL, 9]
-- !select --
\N \N
-1 \N

View File

@ -407,3 +407,12 @@ _
-- !sql --
['2015-03-13']
-- !sql --
[8, NULL]
-- !sql --
['a', '1', '2']
-- !sql --
[NULL, NULL, NULL]

View File

@ -18,3 +18,18 @@
-- !select --
\N [NULL, NULL, NULL, NULL, NULL]
-- !select --
1 [5, 1]
2 [5, 2]
3 [5, 3]
-- !select --
1 ['1', 'abc']
2 ['2', 'abc']
3 ['3', 'abc']
-- !select --
1 [NULL, 1]
2 [NULL, 2]
3 [NULL, 3]

View File

@ -76,6 +76,9 @@ suite("test_array_functions") {
qt_select "SELECT k1, array_with_constant(10, null) from ${tableName} ORDER BY k1"
qt_select "SELECT k1, array_with_constant(2, 'a') from ${tableName} ORDER BY k1"
qt_select "SELECT k1, array_with_constant(2, 123) from ${tableName} ORDER BY k1"
qt_select "SELECT k1, array(2, k1) from ${tableName} ORDER BY k1"
qt_select "SELECT k1, array(k1, null, '2020-01-01') from ${tableName} ORDER BY k1"
qt_select "SELECT k1, array(null, k1) from ${tableName} ORDER BY k1"
def tableName2 = "tbl_test_array_range"
sql """DROP TABLE IF EXISTS ${tableName2}"""

View File

@ -172,7 +172,6 @@ suite("test_array_functions_by_literal") {
qt_sql "select array_with_constant(2, '1')"
qt_sql "select array_with_constant(4, 1223)"
qt_sql "select array_with_constant(8, null)"
// array_compact function
qt_sql "select array_compact([1, 2, 3, 3, null, null, 4, 4])"
qt_sql "select array_compact([null, null, null])"
@ -181,6 +180,9 @@ suite("test_array_functions_by_literal") {
qt_sql "select array_compact(['aaa','aaa','bbb','ccc','ccccc',null, null,'dddd'])"
qt_sql "select array_compact(['2015-03-13','2015-03-13'])"
qt_sql "select array(8, null)"
qt_sql "select array('a', 1, 2)"
qt_sql "select array(null, null, null)"
// abnormal test
test {
sql "select array_intersect([1, 2, 3, 1, 2, 3], '1[3, 2, 5]')"

View File

@ -46,6 +46,9 @@ suite("test_array_functions_with_where") {
qt_select "SELECT k1, size(k2) FROM ${tableName} WHERE arrays_overlap(k2, k4) ORDER BY k1"
qt_select "SELECT k1, size(k2) FROM ${tableName} WHERE cardinality(k2)>0 ORDER BY k1, size(k2)"
qt_select "SELECT k1, array_with_constant(5, k1) FROM ${tableName} WHERE k1 is null ORDER BY k1, size(k2)"
qt_select "SELECT k1, array(5, k1) FROM ${tableName} WHERE k1 is not null ORDER BY k1, size(k2)"
qt_select "SELECT k1, array(k1, 'abc') FROM ${tableName} WHERE k1 is not null ORDER BY k1, size(k2)"
qt_select "SELECT k1, array(null, k1) FROM ${tableName} WHERE k1 is not null ORDER BY k1, size(k2)"
test {
sql "select k1, size(k2) FROM ${tableName} WHERE k2 = []"