[Function] Add bitmap function bitmap_subset_limit (#6980)

Add bitmap function bitmap_subset_limit.
This function will return subset in specified index.
This commit is contained in:
pengxiangyu
2021-11-04 12:14:47 +08:00
committed by GitHub
parent d19a971582
commit 599ecb1f30
10 changed files with 239 additions and 1 deletions

View File

@ -671,6 +671,25 @@ StringVal BitmapFunctions::bitmap_subset_in_range(FunctionContext* ctx, const St
return serialize(ctx, &ret_bitmap);
}
StringVal BitmapFunctions::bitmap_subset_limit(FunctionContext* ctx, const StringVal& src,
const BigIntVal& range_start, const BigIntVal& cardinality_limit) {
if (src.is_null || range_start.is_null || cardinality_limit.is_null) {
return StringVal::null();
}
if (range_start.val < 0 || cardinality_limit.val < 0) {
return StringVal::null();
}
BitmapValue ret_bitmap;
if (src.len == 0) {
ret_bitmap = *reinterpret_cast<BitmapValue*>(src.ptr);
} else {
BitmapValue bitmap = BitmapValue((char*)src.ptr);
bitmap.sub_limit(range_start.val, cardinality_limit.val, &ret_bitmap);
}
return serialize(ctx, &ret_bitmap);
}
template void BitmapFunctions::bitmap_update_int<TinyIntVal>(FunctionContext* ctx,
const TinyIntVal& src, StringVal* dst);
template void BitmapFunctions::bitmap_update_int<SmallIntVal>(FunctionContext* ctx,

View File

@ -106,6 +106,8 @@ public:
static BigIntVal bitmap_max(FunctionContext* ctx, const StringVal& str);
static StringVal bitmap_subset_in_range(FunctionContext* ctx, const StringVal& src,
const BigIntVal& range_start, const BigIntVal& range_end);
static StringVal bitmap_subset_limit(FunctionContext* ctx, const StringVal& src,
const BigIntVal& range_start, const BigIntVal& cardinality_limit);
};
} // namespace doris
#endif //DORIS_BE_SRC_QUERY_EXPRS_BITMAP_FUNCTION_H

View File

@ -1470,6 +1470,27 @@ public:
return count;
}
/**
* Return new set with specified start and limit
* @param range_start the start value for the range
* @param cardinality_limit the length of the subset
* @return the real count for subset, maybe less than cardinality_limit
*/
int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit, BitmapValue* ret_bitmap) {
int64_t count = 0;
for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
if (*it < range_start) {
continue;
}
if (count < cardinality_limit) {
ret_bitmap->add(*it);
++count;
} else {
break;
}
}
return count;
}
private:
void _convert_to_smaller_type() {

View File

@ -754,6 +754,61 @@ TEST_F(BitmapFunctionsTest, bitmap_subset_in_range) {
}
TEST_F(BitmapFunctionsTest, bitmap_subset_limit) {
// null
StringVal res = BitmapFunctions::bitmap_subset_limit(ctx, StringVal::null(), BigIntVal(1), BigIntVal(3));
ASSERT_TRUE(res.is_null);
// empty
BitmapValue bitmap0;
StringVal empty_str = convert_bitmap_to_string(ctx, bitmap0);
res = BitmapFunctions::bitmap_subset_limit(ctx, empty_str, BigIntVal(10), BigIntVal(20));
BigIntVal result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(0), result);
// normal
BitmapValue bitmap1({0,1,2,3,4,5,6,7,45,47,49,43,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500});
StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(4), BigIntVal(10));
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(10), result);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(0), BigIntVal(1));
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(1), result);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(35), BigIntVal(10));
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(7), result);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(31), DecimalV2Value::MAX_INT64);
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(10), result);
// abnormal
// negative range_start and cardinality_limit
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(-10), BigIntVal(20));
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(10), BigIntVal(-20));
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(-10), BigIntVal(-20));
ASSERT_TRUE(res.is_null);
// null range
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal::null(), BigIntVal(20));
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(10), BigIntVal::null());
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal::null(), BigIntVal::null());
ASSERT_TRUE(res.is_null);
}
} // namespace doris
int main(int argc, char** argv) {

View File

@ -307,6 +307,25 @@ TEST(BitmapValueTest, bitmap_to_string) {
ASSERT_STREQ("1,2", empty.to_string().c_str());
}
TEST(BitmapValueTest, sub_limit) {
BitmapValue bitmap({1,2,3,10,11,5,6,7,8,9});
BitmapValue ret_bitmap1;
ASSERT_EQ(5, bitmap.sub_limit(0, 5, &ret_bitmap1));
ASSERT_STREQ("1,2,3,5,6", ret_bitmap1.to_string().c_str());
BitmapValue ret_bitmap2;
ASSERT_EQ(6, bitmap.sub_limit(6, 10, &ret_bitmap2));
ASSERT_STREQ("6,7,8,9,10,11", ret_bitmap2.to_string().c_str());
BitmapValue ret_bitmap3;
ASSERT_EQ(3, bitmap.sub_limit(5, 3, &ret_bitmap3));
ASSERT_STREQ("5,6,7", ret_bitmap3.to_string().c_str());
BitmapValue ret_bitmap4;
ASSERT_EQ(5, bitmap.sub_limit(2, 5, &ret_bitmap4));
ASSERT_STREQ("2,3,5,6,7", ret_bitmap4.to_string().c_str());
}
TEST(BitmapValueTest, bitmap_single_convert) {
BitmapValue bitmap;
ASSERT_STREQ("", bitmap.to_string().c_str());

View File

@ -423,6 +423,7 @@ module.exports = [
"bitmap_and_not",
"bitmap_and_not_count",
"bitmap_subset_in_range",
"bitmap_subset_limit",
"bitmap_to_string",
"bitmap_union",
"bitmap_xor",

View File

@ -427,6 +427,7 @@ module.exports = [
"bitmap_and_not",
"bitmap_and_not_count",
"bitmap_subset_in_range",
"bitmap_subset_limit",
"bitmap_to_string",
"bitmap_union",
"bitmap_xor",

View File

@ -0,0 +1,59 @@
---
{
"title": "bitmap_subset_limit",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# bitmap_subset_limit
## Description
### Syntax
`BITMAP BITMAP_SUBSET_LIMIT(BITMAP src, BIGINT range_start, BIGINT cardinality_limit)`
Create subset of the BITMAP, begin with range from range_start, limit by cardinality_limit
range_start:start value for the range
cardinality_limit:subset upper limit
## example
```
mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 0, 3)) value;
+-----------+
| value |
+-----------+
| 1,2,3 |
+-----------+
mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 4, 3)) value;
+-------+
| value |
+-------+
| 4,5 |
+-------+
```
## keyword
BITMAP_SUBSET_LIMIT,BITMAP_SUBSET,BITMAP

View File

@ -0,0 +1,59 @@
---
{
"title": "bitmap_subset_limit",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# bitmap_subset_limit
## Description
### Syntax
`BITMAP BITMAP_SUBSET_LIMIT(BITMAP src, BIGINT range_start, BIGINT cardinality_limit)`
生成 src 的子 BITMAP, 从不小于 range_start 的位置开始,大小限制为 cardinality_limit 。
range_start:范围起始点(含)
cardinality_limit:子BIGMAP基数上限
## example
```
mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 0, 3)) value;
+-----------+
| value |
+-----------+
| 1,2,3 |
+-----------+
mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 4, 3)) value;
+-------+
| value |
+-------+
| 4,5 |
+-------+
```
## keyword
BITMAP_SUBSET_LIMIT,BITMAP_SUBSET,BITMAP

View File

@ -1220,13 +1220,15 @@ visible_functions = [
[['bitmap_subset_in_range'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'],
'_ZN5doris15BitmapFunctions22bitmap_subset_in_rangeEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_',
'', '', 'vec', ''],
[['bitmap_subset_limit'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'],
'_ZN5doris15BitmapFunctions19bitmap_subset_limitEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_',
'', '', 'vec', ''],
[['bitmap_and_count'], 'BIGINT', ['BITMAP','BITMAP'],
'_ZN5doris15BitmapFunctions16bitmap_and_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
'', '', '', ''],
[['bitmap_or_count'], 'BIGINT', ['BITMAP','BITMAP'],
'_ZN5doris15BitmapFunctions15bitmap_or_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
'', '', '', ''],
# hash functions
[['murmur_hash3_32'], 'INT', ['VARCHAR', '...'],
'_ZN5doris13HashFunctions15murmur_hash3_32EPN9doris_udf15FunctionContextEiPKNS1_9StringValE',