[Function] add BE bitmap function bitmap_subset_in_range (#6917)

Add bitmap function bitmap_subset_in_range.
This function will return subset in specified range (not include the range_end).
This commit is contained in:
qiye
2021-11-01 11:05:19 +08:00
committed by GitHub
parent db1c281be5
commit 65ded82778
9 changed files with 221 additions and 0 deletions

View File

@ -620,6 +620,25 @@ BigIntVal BitmapFunctions::bitmap_max(FunctionContext* ctx, const StringVal& src
}
}
StringVal BitmapFunctions::bitmap_subset_in_range(FunctionContext* ctx, const StringVal& src,
const BigIntVal& range_start, const BigIntVal& range_end) {
if (src.is_null || range_start.is_null || range_end.is_null) {
return StringVal::null();
}
if (range_start.val >= range_end.val || range_start.val < 0 || range_end.val < 0) {
return StringVal::null();
}
BitmapValue ret_bitmap;
if (src.len == 0) {
ret_bitmap = *reinterpret_cast<BitmapValue*>(src.ptr);
} else {
BitmapValue bitmap = BitmapValue((char*)src.ptr);
bitmap.sub_range(range_start.val, range_end.val, &ret_bitmap);
}
return serialize(ctx, &ret_bitmap);
}
template void BitmapFunctions::bitmap_update_int<TinyIntVal>(FunctionContext* ctx,
const TinyIntVal& src, StringVal* dst);
template void BitmapFunctions::bitmap_update_int<SmallIntVal>(FunctionContext* ctx,

View File

@ -97,6 +97,8 @@ public:
template <typename T>
static BigIntVal bitmap_intersect_finalize(FunctionContext* ctx, const StringVal& src);
static BigIntVal bitmap_max(FunctionContext* ctx, const StringVal& str);
static StringVal bitmap_subset_in_range(FunctionContext* ctx, const StringVal& src,
const BigIntVal& range_start, const BigIntVal& range_end);
};
} // namespace doris
#endif //DORIS_BE_SRC_QUERY_EXPRS_BITMAP_FUNCTION_H

View File

@ -1451,6 +1451,26 @@ public:
}
}
/**
* Return new set with specified range (not include the range_end)
*/
int64_t sub_range(const int64_t& range_start, const int64_t& range_end, BitmapValue* ret_bitmap) {
int64_t count = 0;
for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
if (*it < range_start) {
continue;
}
if (*it < range_end) {
ret_bitmap->add(*it);
++count;
} else {
break;
}
}
return count;
}
private:
void _convert_to_smaller_type() {
if (_type == BITMAP) {

View File

@ -552,6 +552,67 @@ TEST_F(BitmapFunctionsTest, bitmap_max) {
ASSERT_EQ(BigIntVal(1024), result);
}
TEST_F(BitmapFunctionsTest, bitmap_subset_in_range) {
// null
StringVal res = BitmapFunctions::bitmap_subset_in_range(ctx, StringVal::null(), BigIntVal(1), BigIntVal(3));
ASSERT_TRUE(res.is_null);
// empty
BitmapValue bitmap0;
StringVal empty_str = convert_bitmap_to_string(ctx, bitmap0);
res = BitmapFunctions::bitmap_subset_in_range(ctx, empty_str, BigIntVal(1), BigIntVal(3));
BigIntVal result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(0), result);
// normal
BitmapValue bitmap1({0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,208,23,24,25,26,27,28,29,30,31,32,33,100,200,500});
StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(30), BigIntVal(200));
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(5), result);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(0), BigIntVal(1));
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(1), result);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(11), BigIntVal(15));
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(4), result);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(11), DecimalV2Value::MAX_INT64);
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(27), result);
// innormal
// start >= end
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(30), BigIntVal(20));
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(20), BigIntVal(20));
ASSERT_TRUE(res.is_null);
// negative range
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(-10), BigIntVal(20));
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(10), BigIntVal(-20));
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(-10), BigIntVal(-20));
ASSERT_TRUE(res.is_null);
// null range
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal::null(), BigIntVal(20));
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(10), BigIntVal::null());
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal::null(), BigIntVal::null());
ASSERT_TRUE(res.is_null);
}
} // namespace doris

View File

@ -418,6 +418,7 @@ module.exports = [
"bitmap_not",
"bitmap_and_not",
"bitmap_and_not_count",
"bitmap_subset_in_range",
"bitmap_to_string",
"bitmap_union",
"bitmap_xor",

View File

@ -422,6 +422,7 @@ module.exports = [
"bitmap_not",
"bitmap_and_not",
"bitmap_and_not_count",
"bitmap_subset_in_range",
"bitmap_to_string",
"bitmap_union",
"bitmap_xor",

View File

@ -0,0 +1,57 @@
---
{
"title": "bitmap_subset_in_range",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# bitmap_subset_in_range
## Description
### Syntax
`BITMAP BITMAP_SUBSET_IN_RANGE(BITMAP src, BIGINT range_start, BIGINT range_end)`
Return subset in specified range (not include the range_end).
## example
```
mysql> select bitmap_to_string(bitmap_subset_in_range(bitmap_from_string('1,2,3,4,5'), 0, 9)) value;
+-----------+
| value |
+-----------+
| 1,2,3,4,5 |
+-----------+
mysql> select bitmap_to_string(bitmap_subset_in_range(bitmap_from_string('1,2,3,4,5'), 2, 3)) value;
+-------+
| value |
+-------+
| 2 |
+-------+
```
## keyword
BITMAP_SUBSET_IN_RANGE,BITMAP_SUBSET,BITMAP

View File

@ -0,0 +1,57 @@
---
{
"title": "bitmap_subset_in_range",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# bitmap_subset_in_range
## Description
### Syntax
`BITMAP BITMAP_SUBSET_IN_RANGE(BITMAP src, BIGINT range_start, BIGINT range_end)`
返回 BITMAP 指定范围内的子集(不包括范围结束)。
## example
```
mysql> select bitmap_to_string(bitmap_subset_in_range(bitmap_from_string('1,2,3,4,5'), 0, 9)) value;
+-----------+
| value |
+-----------+
| 1,2,3,4,5 |
+-----------+
mysql> select bitmap_to_string(bitmap_subset_in_range(bitmap_from_string('1,2,3,4,5'), 2, 3)) value;
+-------+
| value |
+-------+
| 2 |
+-------+
```
## keyword
BITMAP_SUBSET_IN_RANGE,BITMAP_SUBSET,BITMAP

View File

@ -1211,6 +1211,9 @@ visible_functions = [
[['bitmap_max'], 'BIGINT', ['BITMAP'],
'_ZN5doris15BitmapFunctions10bitmap_maxEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', '', ''],
[['bitmap_subset_in_range'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'],
'_ZN5doris15BitmapFunctions22bitmap_subset_in_rangeEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_',
'', '', 'vec', ''],
# hash functions
[['murmur_hash3_32'], 'INT', ['VARCHAR', '...'],