optimize bitmap function count, use roaring cardinality method, this will more fast than current version (#7151)

This commit is contained in:
Zhengguo Yang
2021-11-24 14:42:48 +08:00
committed by GitHub
parent b6a9207a25
commit c9e578032b
3 changed files with 417 additions and 75 deletions

View File

@ -372,7 +372,7 @@ TEST_F(BitmapFunctionsTest, bitmap_xor_count) {
BigIntVal result = BitmapFunctions::bitmap_xor_count(ctx, bitmap_src, bitmap_dst);
BigIntVal expected(4);
ASSERT_EQ(expected, result);
ASSERT_EQ(expected.val, result.val);
}
{
BitmapValue bitmap1({1, 2, 3});
@ -384,7 +384,7 @@ TEST_F(BitmapFunctionsTest, bitmap_xor_count) {
BigIntVal result = BitmapFunctions::bitmap_xor_count(ctx, bitmap_src, bitmap_dst);
BigIntVal expected(0);
ASSERT_EQ(expected, result);
ASSERT_EQ(expected.val, result.val);
}
{
BitmapValue bitmap1({1, 2, 3});
@ -396,7 +396,46 @@ TEST_F(BitmapFunctionsTest, bitmap_xor_count) {
BigIntVal result = BitmapFunctions::bitmap_xor_count(ctx, bitmap_src, bitmap_dst);
BigIntVal expected(6);
ASSERT_EQ(expected, result);
ASSERT_EQ(expected.val, result.val);
}
}
TEST_F(BitmapFunctionsTest, bitmap_xor_count_64) {
{
BitmapValue bitmap1({14001230000000000501ull, 2, 1404560000000000503ull});
BitmapValue bitmap2({1404560000000000503ull, 4, 14111000000000000505ull});
StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
StringVal bitmap_dst = convert_bitmap_to_string(ctx, bitmap2);
BigIntVal result = BitmapFunctions::bitmap_xor_count(ctx, bitmap_src, bitmap_dst);
BigIntVal expected(4);
ASSERT_EQ(expected.val, result.val);
}
{
BitmapValue bitmap1({14123400000000000501ull, 2, 1498760000000000503ull});
BitmapValue bitmap2({14123400000000000501ull, 2, 1498760000000000503ull});
StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
StringVal bitmap_dst = convert_bitmap_to_string(ctx, bitmap2);
BigIntVal result = BitmapFunctions::bitmap_xor_count(ctx, bitmap_src, bitmap_dst);
BigIntVal expected(0);
ASSERT_EQ(expected.val, result.val);
}
{
BitmapValue bitmap1({15000000000000000501ull, 2, 1200000000000000503ull});
BitmapValue bitmap2({13000000000000000504ull, 5, 1160000000000000506ull});
StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
StringVal bitmap_dst = convert_bitmap_to_string(ctx, bitmap2);
BigIntVal result = BitmapFunctions::bitmap_xor_count(ctx, bitmap_src, bitmap_dst);
BigIntVal expected(6);
ASSERT_EQ(expected.val, result.val);
}
}
@ -406,24 +445,52 @@ TEST_F(BitmapFunctionsTest, bitmap_and_count) {
StringVal bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
StringVal bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
BigIntVal result = BitmapFunctions::bitmap_and_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(BigIntVal(0), result);
ASSERT_EQ(0, result.val);
result = BitmapFunctions::bitmap_and_count(ctx, bitmap_src1, StringVal::null());
ASSERT_EQ(BigIntVal(0), result);
ASSERT_EQ(0, result.val);
bitmap1 = BitmapValue({0, 1, 2,std::numeric_limits<uint64_t>::min()});
bitmap2 = BitmapValue({0, 1, 2,std::numeric_limits<uint64_t>::max()});
bitmap1 = BitmapValue({0, 1, 2, std::numeric_limits<uint64_t>::min()});
bitmap2 = BitmapValue({0, 1, 2, std::numeric_limits<uint64_t>::max()});
bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_and_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(BigIntVal(3), result);
ASSERT_EQ(3, result.val);
bitmap1 = BitmapValue({1, 2, 3});
bitmap2 = BitmapValue({3, 4, 5});
bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_and_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(BigIntVal(1), result);
ASSERT_EQ(1, result.val);
}
TEST_F(BitmapFunctionsTest, bitmap_and_count_64) {
BitmapValue bitmap1({14333000000000000501ull, 2, 1454100000000000503ull});
BitmapValue bitmap2;
StringVal bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
StringVal bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
BigIntVal result = BitmapFunctions::bitmap_and_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(0, result.val);
result = BitmapFunctions::bitmap_and_count(ctx, bitmap_src1, StringVal::null());
ASSERT_EQ(0, result.val);
bitmap1 = BitmapValue({11598000000000000501ull, 2, 1923400000000000503ull,
std::numeric_limits<uint64_t>::min()});
bitmap2 = BitmapValue({11598000000000000501ull, 2, 1923400000000000503ull,
std::numeric_limits<uint64_t>::max()});
bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_and_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(3, result.val);
bitmap1 = BitmapValue({15555500000000000501ull, 2, 1400000000000000503ull});
bitmap2 = BitmapValue({1400000000000000503ull, 5, 1400324000000000506ull});
bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_and_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(1, result.val);
}
TEST_F(BitmapFunctionsTest, bitmap_or_count) {
@ -432,24 +499,52 @@ TEST_F(BitmapFunctionsTest, bitmap_or_count) {
StringVal bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
StringVal bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
BigIntVal result = BitmapFunctions::bitmap_or_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(BigIntVal(3), result);
ASSERT_EQ(3, result.val);
result = BitmapFunctions::bitmap_or_count(ctx, bitmap_src1, StringVal::null());
ASSERT_EQ(BigIntVal(0), result);
ASSERT_EQ(0, result.val);
bitmap1 = BitmapValue({0, 1, 2, std::numeric_limits<uint64_t>::min()});
bitmap2 = BitmapValue({0, 1, 2, std::numeric_limits<uint64_t>::max()});
bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_or_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(BigIntVal(4), result);
ASSERT_EQ(4, result.val);
bitmap1 = BitmapValue({1, 2, 3});
bitmap2 = BitmapValue({3, 4, 5});
bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_or_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(BigIntVal(5), result);
ASSERT_EQ(5, result.val);
}
TEST_F(BitmapFunctionsTest, bitmap_or_count_64) {
BitmapValue bitmap1({14087600000000000501ull, 2, 1234500000000000503ull});
BitmapValue bitmap2;
StringVal bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
StringVal bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
BigIntVal result = BitmapFunctions::bitmap_or_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(3, result.val);
result = BitmapFunctions::bitmap_or_count(ctx, bitmap_src1, StringVal::null());
ASSERT_EQ(0, result.val);
bitmap1 = BitmapValue({11870000000000000501ull, 2, 1378900000000000503ull,
std::numeric_limits<uint64_t>::min()});
bitmap2 = BitmapValue({11870000000000000501ull, 2, 1378900000000000503ull,
std::numeric_limits<uint64_t>::max()});
bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_or_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(5, result.val);
bitmap1 = BitmapValue({17870000000000000501ull, 2, 1400000000000000503ull});
bitmap2 = BitmapValue({1400000000000000503ull, 5, 1678900000000000503ull});
bitmap_src1 = convert_bitmap_to_string(ctx, bitmap1);
bitmap_src2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_or_count(ctx, bitmap_src1, bitmap_src2);
ASSERT_EQ(5, result.val);
}
TEST_F(BitmapFunctionsTest, bitmap_not) {
@ -552,7 +647,7 @@ TEST_F(BitmapFunctionsTest, bitmap_and_not_count) {
BigIntVal result = BitmapFunctions::bitmap_and_not_count(ctx, bitmap_src, bitmap_dst);
BigIntVal expected(2);
ASSERT_EQ(expected, result);
ASSERT_EQ(expected.val, result.val);
}
{
BitmapValue bitmap1({1, 2, 3});
@ -564,7 +659,7 @@ TEST_F(BitmapFunctionsTest, bitmap_and_not_count) {
BigIntVal result = BitmapFunctions::bitmap_and_not_count(ctx, bitmap_src, bitmap_dst);
BigIntVal expected(0);
ASSERT_EQ(expected, result);
ASSERT_EQ(expected.val, result.val);
}
{
BitmapValue bitmap1({1, 2, 3});
@ -576,7 +671,7 @@ TEST_F(BitmapFunctionsTest, bitmap_and_not_count) {
BigIntVal result = BitmapFunctions::bitmap_and_not_count(ctx, bitmap_src, bitmap_dst);
BigIntVal expected(3);
ASSERT_EQ(expected, result);
ASSERT_EQ(expected.val, result.val);
}
}
@ -610,33 +705,35 @@ TEST_F(BitmapFunctionsTest, bitmap_has_any) {
}
TEST_F(BitmapFunctionsTest, bitmap_has_all) {
BitmapValue bitmap1({1, 4, 5, std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()});
BitmapValue bitmap2({4, std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()});
BitmapValue bitmap1(
{1, 4, 5, std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()});
BitmapValue bitmap2(
{4, std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()});
StringVal string_val1 = convert_bitmap_to_string(ctx, bitmap1);
StringVal string_val2 = convert_bitmap_to_string(ctx, bitmap2);
BooleanVal result = BitmapFunctions::bitmap_has_all(ctx, string_val1, string_val2);
ASSERT_EQ(BooleanVal{true}, result);
ASSERT_EQ(BooleanVal {true}, result);
bitmap1 = BitmapValue({0, 1, 2});
bitmap2 = BitmapValue({0, 1, 2, std::numeric_limits<uint64_t>::max()});
string_val1 = convert_bitmap_to_string(ctx, bitmap1);
string_val2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_has_all(ctx, string_val1, string_val2);
ASSERT_EQ(BooleanVal{false}, result);
ASSERT_EQ(BooleanVal {false}, result);
bitmap1 = BitmapValue();
bitmap2 = BitmapValue({0, 1, 2});
string_val1 = convert_bitmap_to_string(ctx, bitmap1);
string_val2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_has_all(ctx, string_val1, string_val2);
ASSERT_EQ(BooleanVal{false}, result);
ASSERT_EQ(BooleanVal {false}, result);
bitmap1 = BitmapValue();
bitmap2 = BitmapValue();
string_val1 = convert_bitmap_to_string(ctx, bitmap1);
string_val2 = convert_bitmap_to_string(ctx, bitmap2);
result = BitmapFunctions::bitmap_has_all(ctx, string_val1, string_val2);
ASSERT_EQ(BooleanVal{true}, result);
ASSERT_EQ(BooleanVal {true}, result);
bitmap1 = BitmapValue();
string_val1 = convert_bitmap_to_string(ctx, bitmap1);
@ -694,7 +791,8 @@ TEST_F(BitmapFunctionsTest, bitmap_max) {
TEST_F(BitmapFunctionsTest, bitmap_subset_in_range) {
// null
StringVal res = BitmapFunctions::bitmap_subset_in_range(ctx, StringVal::null(), BigIntVal(1), BigIntVal(3));
StringVal res = BitmapFunctions::bitmap_subset_in_range(ctx, StringVal::null(), BigIntVal(1),
BigIntVal(3));
ASSERT_TRUE(res.is_null);
// empty
@ -705,7 +803,9 @@ TEST_F(BitmapFunctionsTest, bitmap_subset_in_range) {
ASSERT_EQ(BigIntVal(0), result);
// normal
BitmapValue bitmap1({0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,208,23,24,25,26,27,28,29,30,31,32,33,100,200,500});
BitmapValue bitmap1({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 208, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 100, 200, 500});
StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(30), BigIntVal(200));
@ -720,18 +820,19 @@ TEST_F(BitmapFunctionsTest, bitmap_subset_in_range) {
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(4), result);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(11), DecimalV2Value::MAX_INT64);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(11),
DecimalV2Value::MAX_INT64);
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(27), result);
// innormal
// start >= end
// start >= end
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(30), BigIntVal(20));
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(20), BigIntVal(20));
ASSERT_TRUE(res.is_null);
// negative range
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(-10), BigIntVal(20));
ASSERT_TRUE(res.is_null);
@ -743,35 +844,40 @@ TEST_F(BitmapFunctionsTest, bitmap_subset_in_range) {
ASSERT_TRUE(res.is_null);
// null range
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal::null(), BigIntVal(20));
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal::null(),
BigIntVal(20));
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(10), BigIntVal::null());
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal(10),
BigIntVal::null());
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal::null(), BigIntVal::null());
res = BitmapFunctions::bitmap_subset_in_range(ctx, bitmap_src, BigIntVal::null(),
BigIntVal::null());
ASSERT_TRUE(res.is_null);
}
TEST_F(BitmapFunctionsTest, sub_bitmap) {
// normal
BitmapValue bitmap1({0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500});
BitmapValue bitmap1({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33, 100, 200, 500});
StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
StringVal res = BitmapFunctions::sub_bitmap(ctx, bitmap_src, BigIntVal(30), BigIntVal(6));
BitmapValue bitmap2({30,31,32,33,100,200});
BitmapValue bitmap2({30, 31, 32, 33, 100, 200});
ASSERT_EQ(res, convert_bitmap_to_string(ctx, bitmap2));
res = BitmapFunctions::sub_bitmap(ctx, bitmap_src, BigIntVal(30), BigIntVal(100));
BitmapValue bitmap3({30,31,32,33,100,200,500});
BitmapValue bitmap3({30, 31, 32, 33, 100, 200, 500});
ASSERT_EQ(res, convert_bitmap_to_string(ctx, bitmap3));
res = BitmapFunctions::sub_bitmap(ctx, bitmap_src, BigIntVal(30), BigIntVal(INT64_MAX));
BitmapValue bitmap4({30,31,32,33,100,200,500});
BitmapValue bitmap4({30, 31, 32, 33, 100, 200, 500});
ASSERT_EQ(res, convert_bitmap_to_string(ctx, bitmap4));
res = BitmapFunctions::sub_bitmap(ctx, bitmap_src, BigIntVal(0), BigIntVal(2));
BitmapValue bitmap5({0,1});
BitmapValue bitmap5({0, 1});
ASSERT_EQ(res, convert_bitmap_to_string(ctx, bitmap5));
res = BitmapFunctions::sub_bitmap(ctx, bitmap_src, BigIntVal(-1), BigIntVal(2));
@ -779,7 +885,7 @@ TEST_F(BitmapFunctionsTest, sub_bitmap) {
ASSERT_EQ(res, convert_bitmap_to_string(ctx, bitmap6));
res = BitmapFunctions::sub_bitmap(ctx, bitmap_src, BigIntVal(-7), BigIntVal(6));
BitmapValue bitmap7({30,31,32,33,100,200});
BitmapValue bitmap7({30, 31, 32, 33, 100, 200});
ASSERT_EQ(res, convert_bitmap_to_string(ctx, bitmap7));
// null
@ -813,7 +919,8 @@ TEST_F(BitmapFunctionsTest, sub_bitmap) {
TEST_F(BitmapFunctionsTest, bitmap_subset_limit) {
// null
StringVal res = BitmapFunctions::bitmap_subset_limit(ctx, StringVal::null(), BigIntVal(1), BigIntVal(3));
StringVal res = BitmapFunctions::bitmap_subset_limit(ctx, StringVal::null(), BigIntVal(1),
BigIntVal(3));
ASSERT_TRUE(res.is_null);
// empty
@ -824,7 +931,9 @@ TEST_F(BitmapFunctionsTest, bitmap_subset_limit) {
ASSERT_EQ(BigIntVal(0), result);
// normal
BitmapValue bitmap1({0,1,2,3,4,5,6,7,45,47,49,43,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500});
BitmapValue bitmap1({0, 1, 2, 3, 4, 5, 6, 7, 45, 47, 49, 43, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 100, 200, 500});
StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(4), BigIntVal(10));
@ -839,7 +948,8 @@ TEST_F(BitmapFunctionsTest, bitmap_subset_limit) {
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(7), result);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(31), DecimalV2Value::MAX_INT64);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(31),
DecimalV2Value::MAX_INT64);
result = BitmapFunctions::bitmap_count(ctx, res);
ASSERT_EQ(BigIntVal(10), result);
@ -861,9 +971,9 @@ TEST_F(BitmapFunctionsTest, bitmap_subset_limit) {
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(10), BigIntVal::null());
ASSERT_TRUE(res.is_null);
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal::null(), BigIntVal::null());
res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal::null(),
BigIntVal::null());
ASSERT_TRUE(res.is_null);
}
} // namespace doris