[Function](array) support array_range function (#13547)

* array_range with 3 impl

* [Function](array) support array_range function

* update

* update code
This commit is contained in:
zhangstar333
2022-10-28 08:40:24 +08:00
committed by GitHub
parent 43c6428aea
commit 5dd052d386
7 changed files with 352 additions and 0 deletions

View File

@ -153,6 +153,7 @@ set(VEC_FILES
functions/array/function_array_slice.cpp
functions/array/function_array_difference.cpp
functions/array/function_array_enumerate.cpp
functions/array/function_array_range.cpp
exprs/table_function/vexplode_json_array.cpp
functions/math.cpp
functions/function_bitmap.cpp

View File

@ -0,0 +1,195 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "common/status.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_const.h"
#include "vec/columns/columns_number.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/functions/function.h"
#include "vec/functions/function_helpers.h"
#include "vec/functions/simple_function_factory.h"
#include "vec/utils/util.hpp"
namespace doris::vectorized {
template <typename Impl>
class FunctionArrayRange : public IFunction {
public:
static constexpr auto name = "array_range";
static FunctionPtr create() { return std::make_shared<FunctionArrayRange>(); }
/// Get function name.
String get_name() const override { return name; }
bool is_variadic() const override { return true; }
bool use_default_implementation_for_nulls() const override { return false; }
bool use_default_implementation_for_constants() const override { return true; }
ColumnNumbers get_arguments_that_are_always_constant() const override {
return {get_number_of_arguments()};
}
size_t get_number_of_arguments() const override {
return get_variadic_argument_types_impl().size();
}
DataTypes get_variadic_argument_types_impl() const override {
return Impl::get_variadic_argument_types();
}
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
auto nested_type = make_nullable(std::make_shared<DataTypeInt32>());
auto res = std::make_shared<DataTypeArray>(nested_type);
return make_nullable(res);
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
return Impl::execute_impl(context, block, arguments, result, input_rows_count);
}
};
struct RangeImplUtil {
static Status range_execute(Block& block, const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
DCHECK_EQ(arguments.size(), 3);
auto return_nested_type = make_nullable(std::make_shared<DataTypeInt32>());
auto dest_array_column_ptr = ColumnArray::create(return_nested_type->create_column(),
ColumnArray::ColumnOffsets::create());
IColumn* dest_nested_column = &dest_array_column_ptr->get_data();
ColumnNullable* dest_nested_nullable_col =
reinterpret_cast<ColumnNullable*>(dest_nested_column);
dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr();
auto& dest_nested_null_map = dest_nested_nullable_col->get_null_map_column().get_data();
auto args_null_map = ColumnUInt8::create(input_rows_count, 0);
ColumnPtr argument_columns[3];
for (int i = 0; i < 3; ++i) {
argument_columns[i] =
block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) {
// Danger: Here must dispose the null map data first! Because
// argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
// of column nullable mem of null map
VectorizedUtils::update_null_map(args_null_map->get_data(),
nullable->get_null_map_data());
argument_columns[i] = nullable->get_nested_column_ptr();
}
}
auto start_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[0].get());
auto end_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get());
auto step_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get());
DCHECK(dest_nested_column != nullptr);
auto& dest_offsets = dest_array_column_ptr->get_offsets();
auto nested_column = reinterpret_cast<ColumnVector<Int32>*>(dest_nested_column);
dest_offsets.reserve(input_rows_count);
dest_nested_column->reserve(input_rows_count);
dest_nested_null_map.reserve(input_rows_count);
vector(start_column->get_data(), end_column->get_data(), step_column->get_data(),
args_null_map->get_data(), nested_column->get_data(), dest_nested_null_map,
dest_offsets);
block.get_by_position(result).column =
ColumnNullable::create(std::move(dest_array_column_ptr), std::move(args_null_map));
return Status::OK();
}
private:
static void vector(const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& end,
const PaddedPODArray<Int32>& step, NullMap& args_null_map,
PaddedPODArray<Int32>& nested_column,
PaddedPODArray<UInt8>& dest_nested_null_map,
ColumnArray::Offsets64& dest_offsets) {
int rows = start.size();
for (auto row = 0; row < rows; ++row) {
if (args_null_map[row] || start[row] < 0 || end[row] < 0 || step[row] < 0) {
nested_column.push_back(0);
dest_offsets.push_back(dest_offsets.back() + 1);
dest_nested_null_map.push_back(1);
args_null_map[row] = 1;
} else {
int offset = dest_offsets.back();
for (auto idx = start[row]; idx < end[row]; idx = idx + step[row]) {
nested_column.push_back(idx);
dest_nested_null_map.push_back(0);
offset++;
}
dest_offsets.push_back(offset);
}
}
}
};
struct RangeOneImpl {
static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt32>()}; }
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
auto start_column = ColumnInt32::create(input_rows_count, 0);
auto step_column = ColumnInt32::create(input_rows_count, 1);
block.insert({std::move(start_column), std::make_shared<DataTypeInt32>(), "start_column"});
block.insert({std::move(step_column), std::make_shared<DataTypeInt32>(), "step_column"});
ColumnNumbers temp_arguments = {block.columns() - 2, arguments[0], block.columns() - 1};
return RangeImplUtil::range_execute(block, temp_arguments, result, input_rows_count);
}
};
struct RangeTwoImpl {
static DataTypes get_variadic_argument_types() {
return {std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()};
}
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
auto step_column = ColumnInt32::create(input_rows_count, 1);
block.insert({std::move(step_column), std::make_shared<DataTypeInt32>(), "step_column"});
ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1};
return RangeImplUtil::range_execute(block, temp_arguments, result, input_rows_count);
}
};
struct RangeThreeImpl {
static DataTypes get_variadic_argument_types() {
return {std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>(),
std::make_shared<DataTypeInt32>()};
}
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
return RangeImplUtil::range_execute(block, arguments, result, input_rows_count);
}
};
void register_function_array_range(SimpleFunctionFactory& factory) {
factory.register_function<FunctionArrayRange<RangeOneImpl>>();
factory.register_function<FunctionArrayRange<RangeTwoImpl>>();
factory.register_function<FunctionArrayRange<RangeThreeImpl>>();
}
} // namespace doris::vectorized

View File

@ -36,6 +36,7 @@ void register_function_array_intersect(SimpleFunctionFactory&);
void register_function_array_slice(SimpleFunctionFactory&);
void register_function_array_difference(SimpleFunctionFactory&);
void register_function_array_enumerate(SimpleFunctionFactory&);
void register_function_array_range(SimpleFunctionFactory&);
void register_function_array(SimpleFunctionFactory& factory) {
register_function_array_element(factory);
@ -52,6 +53,7 @@ void register_function_array(SimpleFunctionFactory& factory) {
register_function_array_slice(factory);
register_function_array_difference(factory);
register_function_array_enumerate(factory);
register_function_array_range(factory);
}
} // namespace doris::vectorized

View File

@ -0,0 +1,76 @@
---
{
"title": "array_range",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## array_range
### description
#### Syntax
```
ARRAY<Int> array_range(Int end)
ARRAY<Int> array_range(Int start, Int end)
ARRAY<Int> array_range(Int start, Int end, Int step)
```
The parameters are all positive integers.
start default value is 0, and step default value is 1.
Return the array which numbers from start to end - 1 by step.
### notice
`Only supported in vectorized engine`
### example
```
mysql> set enable_vectorized_engine=true;
mysql> select array_range(10);
+--------------------------------+
| array_range(10) |
+--------------------------------+
| [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] |
+--------------------------------+
mysql> select array_range(10,20);
+------------------------------------------+
| array_range(10, 20) |
+------------------------------------------+
| [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] |
+------------------------------------------+
mysql> select array_range(0,20,2);
+-------------------------------------+
| array_range(0, 20, 2) |
+-------------------------------------+
| [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] |
+-------------------------------------+
```
### keywords
ARRAY, RANGE, ARRAY_RANGE

View File

@ -257,6 +257,7 @@
"sql-manual/sql-functions/array-functions/countequal",
"sql-manual/sql-functions/array-functions/element_at",
"sql-manual/sql-functions/array-functions/array_avg",
"sql-manual/sql-functions/array-functions/array_range",
"sql-manual/sql-functions/array-functions/size",
"sql-manual/sql-functions/array-functions/array_distinct",
"sql-manual/sql-functions/array-functions/array_difference",

View File

@ -0,0 +1,74 @@
---
{
"title": "array_range",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## array_range
### description
#### Syntax
```
ARRAY<Int> array_range(Int end)
ARRAY<Int> array_range(Int start, Int end)
ARRAY<Int> array_range(Int start, Int end, Int step)
```
参数均为正整数 start 默认为 0, step 默认为 1。
最终返回一个数组,从start 到 end - 1, 步长为 step。
### notice
`仅支持向量化引擎中使用`
### example
```
mysql> set enable_vectorized_engine=true;
mysql> select array_range(10);
+--------------------------------+
| array_range(10) |
+--------------------------------+
| [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] |
+--------------------------------+
mysql> select array_range(10,20);
+------------------------------------------+
| array_range(10, 20) |
+------------------------------------------+
| [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] |
+------------------------------------------+
mysql> select array_range(0,20,2);
+-------------------------------------+
| array_range(0, 20, 2) |
+-------------------------------------+
| [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] |
+-------------------------------------+
```
### keywords
ARRAY, RANGE, ARRAY_RANGE

View File

@ -436,6 +436,9 @@ visible_functions = [
[['array_slice', '%element_slice%'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''],
[['array_slice', '%element_slice%'], 'ARRAY_STRING', ['ARRAY_STRING', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''],
[['array_range'], 'ARRAY_INT', ['INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
[['array_range'], 'ARRAY_INT', ['INT', 'INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
[['array_range'], 'ARRAY_INT', ['INT', 'INT', 'INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
# reverse function for string builtin
[['reverse'], 'VARCHAR', ['VARCHAR'],
'_ZN5doris15StringFunctions7reverseEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''],