From 65ded82778a7613f7755cb5d7428e63fe65cd251 Mon Sep 17 00:00:00 2001 From: qiye Date: Mon, 1 Nov 2021 11:05:19 +0800 Subject: [PATCH] [Function] add BE bitmap function bitmap_subset_in_range (#6917) Add bitmap function bitmap_subset_in_range. This function will return subset in specified range (not include the range_end). --- be/src/exprs/bitmap_function.cpp | 19 ++++++ be/src/exprs/bitmap_function.h | 2 + be/src/util/bitmap_value.h | 20 ++++++ be/test/exprs/bitmap_function_test.cpp | 61 +++++++++++++++++++ docs/.vuepress/sidebar/en.js | 1 + docs/.vuepress/sidebar/zh-CN.js | 1 + .../bitmap_subset_in_range.md | 57 +++++++++++++++++ .../bitmap_subset_in_range.md | 57 +++++++++++++++++ gensrc/script/doris_builtins_functions.py | 3 + 9 files changed, 221 insertions(+) create mode 100644 docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md create mode 100644 docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp index 7d03984e19..7051eb0e24 100644 --- a/be/src/exprs/bitmap_function.cpp +++ b/be/src/exprs/bitmap_function.cpp @@ -620,6 +620,25 @@ BigIntVal BitmapFunctions::bitmap_max(FunctionContext* ctx, const StringVal& src } } +StringVal BitmapFunctions::bitmap_subset_in_range(FunctionContext* ctx, const StringVal& src, + const BigIntVal& range_start, const BigIntVal& range_end) { + if (src.is_null || range_start.is_null || range_end.is_null) { + return StringVal::null(); + } + if (range_start.val >= range_end.val || range_start.val < 0 || range_end.val < 0) { + return StringVal::null(); + } + BitmapValue ret_bitmap; + if (src.len == 0) { + ret_bitmap = *reinterpret_cast(src.ptr); + } else { + BitmapValue bitmap = BitmapValue((char*)src.ptr); + bitmap.sub_range(range_start.val, range_end.val, &ret_bitmap); + } + + return serialize(ctx, &ret_bitmap); +} + template void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, const TinyIntVal& src, StringVal* dst); template void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h index a07af1c99d..0679bab841 100644 --- a/be/src/exprs/bitmap_function.h +++ b/be/src/exprs/bitmap_function.h @@ -97,6 +97,8 @@ public: template static BigIntVal bitmap_intersect_finalize(FunctionContext* ctx, const StringVal& src); static BigIntVal bitmap_max(FunctionContext* ctx, const StringVal& str); + static StringVal bitmap_subset_in_range(FunctionContext* ctx, const StringVal& src, + const BigIntVal& range_start, const BigIntVal& range_end); }; } // namespace doris #endif //DORIS_BE_SRC_QUERY_EXPRS_BITMAP_FUNCTION_H diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 6f9a2fadfa..af1670ea53 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1451,6 +1451,26 @@ public: } } + /** + * Return new set with specified range (not include the range_end) + */ + int64_t sub_range(const int64_t& range_start, const int64_t& range_end, BitmapValue* ret_bitmap) { + int64_t count = 0; + for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) { + if (*it < range_start) { + continue; + } + if (*it < range_end) { + ret_bitmap->add(*it); + ++count; + } else { + break; + } + } + return count; + } + + private: void _convert_to_smaller_type() { if (_type == BITMAP) { diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp index a4e2fb8b68..2b6511fbf7 100644 --- a/be/test/exprs/bitmap_function_test.cpp +++ b/be/test/exprs/bitmap_function_test.cpp @@ -552,6 +552,67 @@ TEST_F(BitmapFunctionsTest, bitmap_max) { ASSERT_EQ(BigIntVal(1024), result); } +TEST_F(BitmapFunctionsTest, bitmap_subset_in_range) { + // null + StringVal res = BitmapFunctions::bitmap_subset_in_range(ctx, StringVal::null(), BigIntVal(1), BigIntVal(3)); + ASSERT_TRUE(res.is_null); + + // empty + BitmapValue bitmap0; + StringVal empty_str = convert_bitmap_to_string(ctx, bitmap0); + res = BitmapFunctions::bitmap_subset_in_range(ctx, empty_str, BigIntVal(1), BigIntVal(3)); + BigIntVal result = BitmapFunctions::bitmap_count(ctx, res); + ASSERT_EQ(BigIntVal(0), result); + + // normal + BitmapValue bitmap1({0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,208,23,24,25,26,27,28,29,30,31,32,33,100,200,500}); + + StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1); + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(30), BigIntVal(200)); + result = BitmapFunctions::bitmap_count(ctx, res); + ASSERT_EQ(BigIntVal(5), result); + + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(0), BigIntVal(1)); + result = BitmapFunctions::bitmap_count(ctx, res); + ASSERT_EQ(BigIntVal(1), result); + + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(11), BigIntVal(15)); + result = BitmapFunctions::bitmap_count(ctx, res); + ASSERT_EQ(BigIntVal(4), result); + + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(11), DecimalV2Value::MAX_INT64); + result = BitmapFunctions::bitmap_count(ctx, res); + ASSERT_EQ(BigIntVal(27), result); + + // innormal + // start >= end + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(30), BigIntVal(20)); + ASSERT_TRUE(res.is_null); + + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(20), BigIntVal(20)); + ASSERT_TRUE(res.is_null); + + // negative range + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(-10), BigIntVal(20)); + ASSERT_TRUE(res.is_null); + + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(10), BigIntVal(-20)); + ASSERT_TRUE(res.is_null); + + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(-10), BigIntVal(-20)); + ASSERT_TRUE(res.is_null); + + // null range + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal::null(), BigIntVal(20)); + ASSERT_TRUE(res.is_null); + + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(10), BigIntVal::null()); + ASSERT_TRUE(res.is_null); + + res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal::null(), BigIntVal::null()); + ASSERT_TRUE(res.is_null); + +} } // namespace doris diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index 8ddd4f653b..158d3615e0 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -418,6 +418,7 @@ module.exports = [ "bitmap_not", "bitmap_and_not", "bitmap_and_not_count", + "bitmap_subset_in_range", "bitmap_to_string", "bitmap_union", "bitmap_xor", diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index 67530d47da..7f678ca4fd 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -422,6 +422,7 @@ module.exports = [ "bitmap_not", "bitmap_and_not", "bitmap_and_not_count", + "bitmap_subset_in_range", "bitmap_to_string", "bitmap_union", "bitmap_xor", diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md new file mode 100644 index 0000000000..c72d65216f --- /dev/null +++ b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md @@ -0,0 +1,57 @@ +--- +{ + "title": "bitmap_subset_in_range", + "language": "en" +} +--- + + + +# bitmap_subset_in_range + +## Description + +### Syntax + +`BITMAP BITMAP_SUBSET_IN_RANGE(BITMAP src, BIGINT range_start, BIGINT range_end)` + +Return subset in specified range (not include the range_end). + +## example + +``` +mysql> select bitmap_to_string(bitmap_subset_in_range(bitmap_from_string('1,2,3,4,5'), 0, 9)) value; ++-----------+ +| value | ++-----------+ +| 1,2,3,4,5 | ++-----------+ + +mysql> select bitmap_to_string(bitmap_subset_in_range(bitmap_from_string('1,2,3,4,5'), 2, 3)) value; ++-------+ +| value | ++-------+ +| 2 | ++-------+ +``` + +## keyword + + BITMAP_SUBSET_IN_RANGE,BITMAP_SUBSET,BITMAP diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md new file mode 100644 index 0000000000..5fb673b1ad --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md @@ -0,0 +1,57 @@ +--- +{ + "title": "bitmap_subset_in_range", + "language": "zh-CN" +} +--- + + + +# bitmap_subset_in_range + +## Description + +### Syntax + +`BITMAP BITMAP_SUBSET_IN_RANGE(BITMAP src, BIGINT range_start, BIGINT range_end)` + +返回 BITMAP 指定范围内的子集(不包括范围结束)。 + +## example + +``` +mysql> select bitmap_to_string(bitmap_subset_in_range(bitmap_from_string('1,2,3,4,5'), 0, 9)) value; ++-----------+ +| value | ++-----------+ +| 1,2,3,4,5 | ++-----------+ + +mysql> select bitmap_to_string(bitmap_subset_in_range(bitmap_from_string('1,2,3,4,5'), 2, 3)) value; ++-------+ +| value | ++-------+ +| 2 | ++-------+ +``` + +## keyword + + BITMAP_SUBSET_IN_RANGE,BITMAP_SUBSET,BITMAP diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index c67ef16c67..651ea96926 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1211,6 +1211,9 @@ visible_functions = [ [['bitmap_max'], 'BIGINT', ['BITMAP'], '_ZN5doris15BitmapFunctions10bitmap_maxEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', '', ''], + [['bitmap_subset_in_range'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], + '_ZN5doris15BitmapFunctions22bitmap_subset_in_rangeEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_', + '', '', 'vec', ''], # hash functions [['murmur_hash3_32'], 'INT', ['VARCHAR', '...'],