diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp index 6abbee447f..ccab0c558c 100644 --- a/be/src/exprs/bitmap_function.cpp +++ b/be/src/exprs/bitmap_function.cpp @@ -671,6 +671,25 @@ StringVal BitmapFunctions::bitmap_subset_in_range(FunctionContext* ctx, const St return serialize(ctx, &ret_bitmap); } +StringVal BitmapFunctions::bitmap_subset_limit(FunctionContext* ctx, const StringVal& src, + const BigIntVal& range_start, const BigIntVal& cardinality_limit) { + if (src.is_null || range_start.is_null || cardinality_limit.is_null) { + return StringVal::null(); + } + if (range_start.val < 0 || cardinality_limit.val < 0) { + return StringVal::null(); + } + BitmapValue ret_bitmap; + if (src.len == 0) { + ret_bitmap = *reinterpret_cast(src.ptr); + } else { + BitmapValue bitmap = BitmapValue((char*)src.ptr); + bitmap.sub_limit(range_start.val, cardinality_limit.val, &ret_bitmap); + } + + return serialize(ctx, &ret_bitmap); +} + template void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, const TinyIntVal& src, StringVal* dst); template void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h index 89cf0d9a0b..5b166a0470 100644 --- a/be/src/exprs/bitmap_function.h +++ b/be/src/exprs/bitmap_function.h @@ -106,6 +106,8 @@ public: static BigIntVal bitmap_max(FunctionContext* ctx, const StringVal& str); static StringVal bitmap_subset_in_range(FunctionContext* ctx, const StringVal& src, const BigIntVal& range_start, const BigIntVal& range_end); + static StringVal bitmap_subset_limit(FunctionContext* ctx, const StringVal& src, + const BigIntVal& range_start, const BigIntVal& cardinality_limit); }; } // namespace doris #endif //DORIS_BE_SRC_QUERY_EXPRS_BITMAP_FUNCTION_H diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index af1670ea53..9bf6f6516b 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1470,6 +1470,27 @@ public: return count; } + /** + * Return new set with specified start and limit + * @param range_start the start value for the range + * @param cardinality_limit the length of the subset + * @return the real count for subset, maybe less than cardinality_limit + */ + int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit, BitmapValue* ret_bitmap) { + int64_t count = 0; + for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) { + if (*it < range_start) { + continue; + } + if (count < cardinality_limit) { + ret_bitmap->add(*it); + ++count; + } else { + break; + } + } + return count; + } private: void _convert_to_smaller_type() { diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp index 437985812a..c753383e71 100644 --- a/be/test/exprs/bitmap_function_test.cpp +++ b/be/test/exprs/bitmap_function_test.cpp @@ -754,6 +754,61 @@ TEST_F(BitmapFunctionsTest, bitmap_subset_in_range) { } +TEST_F(BitmapFunctionsTest, bitmap_subset_limit) { + // null + StringVal res = BitmapFunctions::bitmap_subset_limit(ctx, StringVal::null(), BigIntVal(1), BigIntVal(3)); + ASSERT_TRUE(res.is_null); + + // empty + BitmapValue bitmap0; + StringVal empty_str = convert_bitmap_to_string(ctx, bitmap0); + res = BitmapFunctions::bitmap_subset_limit(ctx, empty_str, BigIntVal(10), BigIntVal(20)); + BigIntVal result = BitmapFunctions::bitmap_count(ctx, res); + ASSERT_EQ(BigIntVal(0), result); + + // normal + BitmapValue bitmap1({0,1,2,3,4,5,6,7,45,47,49,43,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500}); + + StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1); + res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(4), BigIntVal(10)); + result = BitmapFunctions::bitmap_count(ctx, res); + ASSERT_EQ(BigIntVal(10), result); + + res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(0), BigIntVal(1)); + result = BitmapFunctions::bitmap_count(ctx, res); + ASSERT_EQ(BigIntVal(1), result); + + res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(35), BigIntVal(10)); + result = BitmapFunctions::bitmap_count(ctx, res); + ASSERT_EQ(BigIntVal(7), result); + + res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(31), DecimalV2Value::MAX_INT64); + result = BitmapFunctions::bitmap_count(ctx, res); + ASSERT_EQ(BigIntVal(10), result); + + // abnormal + // negative range_start and cardinality_limit + res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(-10), BigIntVal(20)); + ASSERT_TRUE(res.is_null); + + res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(10), BigIntVal(-20)); + ASSERT_TRUE(res.is_null); + + res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(-10), BigIntVal(-20)); + ASSERT_TRUE(res.is_null); + + // null range + res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal::null(), BigIntVal(20)); + ASSERT_TRUE(res.is_null); + + res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(10), BigIntVal::null()); + ASSERT_TRUE(res.is_null); + + res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal::null(), BigIntVal::null()); + ASSERT_TRUE(res.is_null); + +} + } // namespace doris int main(int argc, char** argv) { diff --git a/be/test/util/bitmap_value_test.cpp b/be/test/util/bitmap_value_test.cpp index dc4a0f33a2..997320510c 100644 --- a/be/test/util/bitmap_value_test.cpp +++ b/be/test/util/bitmap_value_test.cpp @@ -307,6 +307,25 @@ TEST(BitmapValueTest, bitmap_to_string) { ASSERT_STREQ("1,2", empty.to_string().c_str()); } +TEST(BitmapValueTest, sub_limit) { + BitmapValue bitmap({1,2,3,10,11,5,6,7,8,9}); + BitmapValue ret_bitmap1; + ASSERT_EQ(5, bitmap.sub_limit(0, 5, &ret_bitmap1)); + ASSERT_STREQ("1,2,3,5,6", ret_bitmap1.to_string().c_str()); + + BitmapValue ret_bitmap2; + ASSERT_EQ(6, bitmap.sub_limit(6, 10, &ret_bitmap2)); + ASSERT_STREQ("6,7,8,9,10,11", ret_bitmap2.to_string().c_str()); + + BitmapValue ret_bitmap3; + ASSERT_EQ(3, bitmap.sub_limit(5, 3, &ret_bitmap3)); + ASSERT_STREQ("5,6,7", ret_bitmap3.to_string().c_str()); + + BitmapValue ret_bitmap4; + ASSERT_EQ(5, bitmap.sub_limit(2, 5, &ret_bitmap4)); + ASSERT_STREQ("2,3,5,6,7", ret_bitmap4.to_string().c_str()); +} + TEST(BitmapValueTest, bitmap_single_convert) { BitmapValue bitmap; ASSERT_STREQ("", bitmap.to_string().c_str()); diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index e6c4e3b774..39be10bed0 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -423,6 +423,7 @@ module.exports = [ "bitmap_and_not", "bitmap_and_not_count", "bitmap_subset_in_range", + "bitmap_subset_limit", "bitmap_to_string", "bitmap_union", "bitmap_xor", diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index a21315d2f9..837c04eda4 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -427,6 +427,7 @@ module.exports = [ "bitmap_and_not", "bitmap_and_not_count", "bitmap_subset_in_range", + "bitmap_subset_limit", "bitmap_to_string", "bitmap_union", "bitmap_xor", diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md new file mode 100644 index 0000000000..0e4948ade7 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md @@ -0,0 +1,59 @@ +--- +{ + "title": "bitmap_subset_limit", + "language": "en" +} +--- + + + +# bitmap_subset_limit + +## Description + +### Syntax + +`BITMAP BITMAP_SUBSET_LIMIT(BITMAP src, BIGINT range_start, BIGINT cardinality_limit)` + +Create subset of the BITMAP, begin with range from range_start, limit by cardinality_limit +range_start:start value for the range +cardinality_limit:subset upper limit + +## example + +``` +mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 0, 3)) value; ++-----------+ +| value | ++-----------+ +| 1,2,3 | ++-----------+ + +mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 4, 3)) value; ++-------+ +| value | ++-------+ +| 4,5 | ++-------+ +``` + +## keyword + + BITMAP_SUBSET_LIMIT,BITMAP_SUBSET,BITMAP diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md new file mode 100644 index 0000000000..be905b5637 --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md @@ -0,0 +1,59 @@ +--- +{ + "title": "bitmap_subset_limit", + "language": "zh-CN" +} +--- + + + +# bitmap_subset_limit + +## Description + +### Syntax + +`BITMAP BITMAP_SUBSET_LIMIT(BITMAP src, BIGINT range_start, BIGINT cardinality_limit)` + +生成 src 的子 BITMAP, 从不小于 range_start 的位置开始,大小限制为 cardinality_limit 。 +range_start:范围起始点(含) +cardinality_limit:子BIGMAP基数上限 + +## example + +``` +mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 0, 3)) value; ++-----------+ +| value | ++-----------+ +| 1,2,3 | ++-----------+ + +mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 4, 3)) value; ++-------+ +| value | ++-------+ +| 4,5 | ++-------+ +``` + +## keyword + + BITMAP_SUBSET_LIMIT,BITMAP_SUBSET,BITMAP diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 87d0e0c2a5..6c990b5cff 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1220,13 +1220,15 @@ visible_functions = [ [['bitmap_subset_in_range'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], '_ZN5doris15BitmapFunctions22bitmap_subset_in_rangeEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_', '', '', 'vec', ''], + [['bitmap_subset_limit'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], + '_ZN5doris15BitmapFunctions19bitmap_subset_limitEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_', + '', '', 'vec', ''], [['bitmap_and_count'], 'BIGINT', ['BITMAP','BITMAP'], '_ZN5doris15BitmapFunctions16bitmap_and_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', '', ''], [['bitmap_or_count'], 'BIGINT', ['BITMAP','BITMAP'], '_ZN5doris15BitmapFunctions15bitmap_or_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', '', ''], - # hash functions [['murmur_hash3_32'], 'INT', ['VARCHAR', '...'], '_ZN5doris13HashFunctions15murmur_hash3_32EPN9doris_udf15FunctionContextEiPKNS1_9StringValE',