[Vectorized](function) support bitmap_from_array function (#14259)

This commit is contained in:
zhangstar333
2022-11-15 01:55:51 +08:00
committed by GitHub
parent 37fdd011b4
commit 93e5d8e660
7 changed files with 198 additions and 6 deletions

View File

@ -21,9 +21,12 @@
#include "gutil/strings/numbers.h"
#include "gutil/strings/split.h"
#include "util/string_parser.hpp"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/columns_number.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
#include "vec/functions/function_always_not_nullable.h"
#include "vec/functions/function_bitmap_min_or_max.h"
#include "vec/functions/function_const.h"
@ -175,6 +178,8 @@ struct ToBitmapWithCheck {
};
struct BitmapFromString {
using ArgumentType = DataTypeString;
static constexpr auto name = "bitmap_from_string";
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
@ -199,6 +204,42 @@ struct BitmapFromString {
}
};
struct BitmapFromArray {
using ArgumentType = DataTypeArray;
static constexpr auto name = "bitmap_from_array";
template <typename ColumnType>
static Status vector(const ColumnArray::Offsets64& offset_column_data,
const IColumn& nested_column, const NullMap& nested_null_map,
std::vector<BitmapValue>& res, NullMap& null_map) {
const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data();
auto size = offset_column_data.size();
res.reserve(size);
std::vector<uint64_t> bits;
for (size_t i = 0; i < size; ++i) {
auto curr_offset = offset_column_data[i];
auto prev_offset = offset_column_data[i - 1];
for (auto j = prev_offset; j < curr_offset; ++j) {
auto data = nested_column_data[j];
// invaild value
if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
res.emplace_back();
null_map[i] = 1;
break;
} else {
bits.push_back(data);
}
}
//input is valid value
if (!null_map[i]) {
res.emplace_back(bits);
bits.clear();
}
}
return Status::OK();
}
};
template <typename Impl>
class FunctionBitmapAlwaysNull : public IFunction {
public:
@ -227,12 +268,39 @@ public:
ColumnPtr argument_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const ColumnString* str_column = check_and_get_column<ColumnString>(argument_column.get());
const ColumnString::Chars& data = str_column->get_chars();
const ColumnString::Offsets& offsets = str_column->get_offsets();
Impl::vector(data, offsets, res, null_map);
if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) {
const auto& str_column = static_cast<const ColumnString&>(*argument_column);
const ColumnString::Chars& data = str_column.get_chars();
const ColumnString::Offsets& offsets = str_column.get_offsets();
Impl::vector(data, offsets, res, null_map);
} else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) {
auto argument_type = remove_nullable(
assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type)
.get_nested_type());
const auto& array_column = static_cast<const ColumnArray&>(*argument_column);
const auto& offset_column_data = array_column.get_offsets();
const auto& nested_nullable_column =
static_cast<const ColumnNullable&>(array_column.get_data());
const auto& nested_column = nested_nullable_column.get_nested_column();
const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data();
if (check_column<ColumnInt8>(nested_column)) {
Impl::template vector<ColumnInt8>(offset_column_data, nested_column,
nested_null_map, res, null_map);
} else if (check_column<ColumnInt16>(nested_column)) {
Impl::template vector<ColumnInt16>(offset_column_data, nested_column,
nested_null_map, res, null_map);
} else if (check_column<ColumnInt32>(nested_column)) {
Impl::template vector<ColumnInt32>(offset_column_data, nested_column,
nested_null_map, res, null_map);
} else if (check_column<ColumnInt64>(nested_column)) {
Impl::template vector<ColumnInt64>(offset_column_data, nested_column,
nested_null_map, res, null_map);
}
} else {
return Status::RuntimeError("Illegal column {} of argument of function {}",
block.get_by_position(arguments[0]).column->get_name(),
get_name());
}
block.get_by_position(result).column =
ColumnNullable::create(std::move(res_data_column), std::move(res_null_map));
return Status::OK();
@ -695,6 +763,7 @@ using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>;
using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>;
using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
using FunctionBitmapFromArray = FunctionBitmapAlwaysNull<BitmapFromArray>;
using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>;
using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>;
@ -724,6 +793,7 @@ void register_function_bitmap(SimpleFunctionFactory& factory) {
factory.register_function<FunctionToBitmap>();
factory.register_function<FunctionToBitmapWithCheck>();
factory.register_function<FunctionBitmapFromString>();
factory.register_function<FunctionBitmapFromArray>();
factory.register_function<FunctionBitmapHash>();
factory.register_function<FunctionBitmapHash64>();
factory.register_function<FunctionBitmapCount>();

View File

@ -0,0 +1,53 @@
---
{
"title": "bitmap_from_array",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## bitmap_from_array
### description
#### Syntax
`BITMAP BITMAP_FROM_ARRAY(ARRAY input)`
Convert a TINYINT/SMALLINT/INT/BIGINT array to a BITMAP
When the input field is illegal, the result returns NULL
### example
```
mysql> select *, bitmap_to_string(bitmap_from_array(c_array)) from array_test;
+------+-----------------------+------------------------------------------------+
| id | c_array | bitmap_to_string(bitmap_from_array(`c_array`)) |
+------+-----------------------+------------------------------------------------+
| 1 | [NULL] | NULL |
| 2 | [1, 2, 3, NULL] | NULL |
| 2 | [1, 2, 3, -10] | NULL |
| 3 | [1, 2, 3, 4, 5, 6, 7] | 1,2,3,4,5,6,7 |
| 4 | [100, 200, 300, 300] | 100,200,300 |
+------+-----------------------+------------------------------------------------+
5 rows in set (0.02 sec)
```
### keywords
BITMAP_FROM_ARRAY,BITMAP

View File

@ -463,6 +463,7 @@
"sql-manual/sql-functions/bitmap-functions/bitmap_from_string",
"sql-manual/sql-functions/bitmap-functions/bitmap_to_string",
"sql-manual/sql-functions/bitmap-functions/bitmap_to_array",
"sql-manual/sql-functions/bitmap-functions/bitmap_from_array",
"sql-manual/sql-functions/bitmap-functions/bitmap_empty",
"sql-manual/sql-functions/bitmap-functions/bitmap_or",
"sql-manual/sql-functions/bitmap-functions/bitmap_and",

View File

@ -0,0 +1,53 @@
---
{
"title": "bitmap_from_array",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## bitmap_from_array
### description
#### Syntax
`BITMAP BITMAP_FROM_ARRAY(ARRAY input)`
将一个TINYINT/SMALLINT/INT/BIGINT类型的数组转化为一个BITMAP
当输入字段不合法时,结果返回NULL
### example
```
mysql> select *, bitmap_to_string(bitmap_from_array(c_array)) from array_test;
+------+-----------------------+------------------------------------------------+
| id | c_array | bitmap_to_string(bitmap_from_array(`c_array`)) |
+------+-----------------------+------------------------------------------------+
| 1 | [NULL] | NULL |
| 2 | [1, 2, 3, NULL] | NULL |
| 2 | [1, 2, 3, -10] | NULL |
| 3 | [1, 2, 3, 4, 5, 6, 7] | 1,2,3,4,5,6,7 |
| 4 | [100, 200, 300, 300] | 100,200,300 |
+------+-----------------------+------------------------------------------------+
5 rows in set (0.02 sec)
```
### keywords
BITMAP_FROM_ARRAY,BITMAP

View File

@ -2610,6 +2610,10 @@ visible_functions = [
[['bitmap_from_string'], 'BITMAP', ['STRING'],
'_ZN5doris15BitmapFunctions18bitmap_from_stringEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NULLABLE'],
[['bitmap_from_array'], 'BITMAP', ['ARRAY_TINYINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
[['bitmap_from_array'], 'BITMAP', ['ARRAY_SMALLINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
[['bitmap_from_array'], 'BITMAP', ['ARRAY_INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
[['bitmap_from_array'], 'BITMAP', ['ARRAY_BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
[['bitmap_contains'], 'BOOLEAN', ['BITMAP','BIGINT'],
'_ZN5doris15BitmapFunctions15bitmap_containsEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValE',
'', '', 'vec', ''],

View File

@ -290,3 +290,12 @@
8 []
9 [9]
-- !select --
[1, 2, 3] 1,2,3
[4] 4
[]
[1, 2, 3, 4, 5, 4, 3, 2, 1] 1,2,3,4,5
[]
[1, 2, 3, 4, 5, 4, 3, 2, 1] 1,2,3,4,5
[8, 9, NULL, 10, NULL] \N

View File

@ -105,4 +105,6 @@ suite("test_array_functions") {
qt_select "SELECT k1, array_range(k1) from ${tableName2} ORDER BY k1"
qt_select "SELECT k1, array_range(k1,k2) from ${tableName2} ORDER BY k1"
qt_select "SELECT k1, array_range(k1,k2,k3) from ${tableName2} ORDER BY k1"
qt_select "select k2, bitmap_to_string(bitmap_from_array(k2)) from ${tableName} order by k1;"
}