roaringbitmap type optimization and default value adaption

This commit is contained in:
qijiax 2024-07-05 05:17:14 +00:00 committed by ob-robot
parent e036c7f2d1
commit dcefc03971
13 changed files with 140 additions and 76 deletions

View File

@ -1597,9 +1597,6 @@ DEF_ENUMSET_INNER_FUNCS(ObSetInnerType, set_inner, ObString);
// 2. CS_FUNCS: lob with same content and different lobids will have different crc & hash,
// but error occur in farm, not used?
// Todo: @xiyu
// Check CS_FUNCS of roaringbitmap may not be used?
#define DEF_TEXT_FUNCS(OBJTYPE, TYPE, VTYPE) \
DEF_TEXT_PRINT_FUNCS(OBJTYPE); \
DEF_STRING_CS_FUNCS(OBJTYPE); \
@ -1609,7 +1606,6 @@ DEF_TEXT_FUNCS(ObTinyTextType, string, ObString);
DEF_TEXT_FUNCS(ObTextType, string, ObString);
DEF_TEXT_FUNCS(ObMediumTextType, string, ObString);
DEF_TEXT_FUNCS(ObLongTextType, string, ObString);
DEF_TEXT_FUNCS(ObRoaringBitmapType, string, ObString);
#define DEF_GEO_CS_FUNCS(OBJTYPE) \
@ -3576,6 +3572,13 @@ inline int64_t obj_val_get_serialize_size<ObCollectionSQLType>(const ObObj &obj)
DEF_UDT_CS_FUNCS(ObCollectionSQLType);
#define DEF_ROARINGBITMAP_FUNCS(OBJTYPE, TYPE, VTYPE) \
DEF_HEX_STRING_PRINT_FUNCS(OBJTYPE); \
DEF_STRING_CS_FUNCS(OBJTYPE); \
DEF_TEXT_SERIALIZE_FUNCS(OBJTYPE, TYPE, VTYPE)
DEF_ROARINGBITMAP_FUNCS(ObRoaringBitmapType, string, ObString);
}
}

View File

@ -129,14 +129,11 @@ int ObRbUtils::check_get_bin_type(const ObString &rb_bin, ObRbBinType &bin_type)
return ret;
}
int ObRbUtils::get_cardinality(ObIAllocator &allocator, ObString &rb_bin, uint64_t &cardinality)
int ObRbUtils::get_cardinality(ObIAllocator &allocator, const ObString &rb_bin, ObRbBinType bin_type, uint64_t &cardinality)
{
int ret = OB_SUCCESS;
ObRbBinType bin_type;
uint32_t offset = RB_VERSION_SIZE + RB_BIN_TYPE_SIZE;
if (OB_FAIL(ObRbUtils::check_get_bin_type(rb_bin, bin_type))) {
LOG_WARN("invalid roaringbitmap binary string", K(ret));
} else if (bin_type == ObRbBinType::EMPTY) {
if (bin_type == ObRbBinType::EMPTY) {
cardinality = 0;
} else if (bin_type == ObRbBinType::SINGLE_32 || bin_type == ObRbBinType::SINGLE_64) {
cardinality = 1;
@ -207,6 +204,54 @@ int ObRbUtils::rb_serialize(ObIAllocator &allocator, ObString &res_rb_bin, ObRoa
return ret;
}
int ObRbUtils::build_binary(ObIAllocator &allocator, ObString &rb_bin, ObString &res_rb_bin)
{
int ret = OB_SUCCESS;
ObRbBinType bin_type;
uint64_t cardinality = 0;
if (OB_FAIL(check_get_bin_type(rb_bin, bin_type))) {
LOG_WARN("invalid roaringbitmap binary string", K(ret));
} else if (OB_FAIL(get_cardinality(allocator, rb_bin, bin_type, cardinality))) {
LOG_WARN("failed to get cardinality from roaringbitmap binary", K(ret));
} else if (((bin_type == ObRbBinType::BITMAP_32 || bin_type == ObRbBinType::BITMAP_64) && cardinality <= MAX_BITMAP_SET_VALUES)
|| (bin_type == ObRbBinType::SET_32 && cardinality < 2)
|| bin_type == ObRbBinType::SET_64) {
// deserialize -> optimize -> serialize
ObRoaringBitmap *rb = NULL;
if (OB_FAIL(rb_deserialize(allocator, rb_bin, rb))) {
LOG_WARN("failed to deserialize roaringbitmap", K(ret));
} else if (OB_FAIL(rb_serialize(allocator, res_rb_bin, rb))) {
LOG_WARN("failed to serialize roaringbitmap", K(ret));
}
rb_destroy(rb);
} else if (bin_type == ObRbBinType::BITMAP_64) {
uint32_t offset = RB_VERSION_SIZE + RB_BIN_TYPE_SIZE;
uint64_t buckets = *reinterpret_cast<uint64_t*>(rb_bin.ptr() + offset);
offset += sizeof(uint64_t);
uint32_t high32 = *reinterpret_cast<uint32_t*>(rb_bin.ptr() + offset);
offset += sizeof(uint32_t);
if (buckets == 1 && high32 == 0) {
// BITMAP_32 is enough
bin_type = ObRbBinType::BITMAP_32;
ObStringBuffer res_buf(&allocator);
if (OB_FAIL(res_buf.append(rb_bin.ptr(), RB_VERSION_SIZE))) {
LOG_WARN("failed to append version", K(ret), K(rb_bin));
} else if (OB_FAIL(res_buf.append(reinterpret_cast<const char*>(&bin_type), RB_BIN_TYPE_SIZE))) {
LOG_WARN("failed to append bin_type", K(ret));
} else if (OB_FAIL(res_buf.append(rb_bin.ptr() + offset, rb_bin.length() - offset))) {
LOG_WARN("failed to append roaing binary", K(ret), K(rb_bin));
} else {
res_rb_bin.assign_ptr(res_buf.ptr(), res_buf.length());
}
} else {
res_rb_bin = rb_bin;
}
} else {
res_rb_bin = rb_bin;
}
return ret;
}
int ObRbUtils::binary_format_convert(ObIAllocator &allocator, const ObString &rb_bin, ObString &binary_str)
{
int ret = OB_SUCCESS;

View File

@ -28,12 +28,13 @@ public:
// binary operation
static int check_get_bin_type(const ObString &rb_bin, ObRbBinType &bin_type);
static int get_cardinality(ObIAllocator &allocator, ObString &rb_bin, uint64_t &cardinality);
static int get_cardinality(ObIAllocator &allocator, const ObString &rb_bin, ObRbBinType bin_type, uint64_t &cardinality);
// common
static void rb_destroy(ObRoaringBitmap *&rb);
static int rb_deserialize(ObIAllocator &allocator, const ObString &rb_bin, ObRoaringBitmap *&rb);
static int rb_serialize(ObIAllocator &allocator, ObString &res_rb_bin, ObRoaringBitmap *&rb);
static int build_binary(ObIAllocator &allocator, ObString &rb_bin, ObString &res_rb_bin);
static int binary_format_convert(ObIAllocator &allocator, const ObString &rb_bin, ObString &roaring_bin);
static int rb_from_string(ObIAllocator &allocator, ObString &rb_str, ObRoaringBitmap *&rb);
static int rb_to_string(ObIAllocator &allocator, ObString &rb_bin, ObString &res_rb_str);

View File

@ -131,7 +131,7 @@ int ObRoaringBitmap::value_remove(uint64_t value) {
break;
}
case ObRbType::SET: {
if (OB_FAIL(set_.erase_refactored(value))) {
if (set_.exist_refactored(value) == OB_HASH_EXIST && OB_FAIL(set_.erase_refactored(value))) {
LOG_WARN("failed to erase value from the set", K(ret), K(value));
}
break;
@ -630,12 +630,32 @@ int ObRoaringBitmap::convert_bitmap_to_smaller_type() {
int ret = OB_SUCCESS;
if (is_bitmap_type()) {
uint64_t cardinality = roaring::api::roaring64_bitmap_get_cardinality(bitmap_);
if (cardinality > 1) {
//do nothing
} else if (cardinality == 1) {
set_single(roaring64_bitmap_minimum(bitmap_));
} else if (cardinality == 0) {
if (cardinality == 0) {
set_empty();
} else if (cardinality == 1) {
set_single(roaring::api::roaring64_bitmap_minimum(bitmap_));
} else if (cardinality <= MAX_BITMAP_SET_VALUES) {
roaring::api::roaring64_iterator_t* it = nullptr;
if (OB_ISNULL(it = roaring::api::roaring64_iterator_create(bitmap_))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to create bitmap iterator", K(ret));
} else if (OB_FAIL(set_.create(MAX_BITMAP_SET_VALUES))) {
LOG_WARN("failed to create set", K(ret));
} else if (OB_FALSE_IT(type_ = ObRbType::SET)) {
} else {
do {
if (OB_FAIL(set_.set_refactored(roaring::api::roaring64_iterator_value(it)))) {
LOG_WARN("failed to set value to the set", K(ret), K(roaring::api::roaring64_iterator_value(it)));
}
} while (roaring::api::roaring64_iterator_advance(it) && OB_SUCC(ret));
}
if (OB_NOT_NULL(it)) {
roaring::api::roaring64_iterator_free(it);
}
if (OB_NOT_NULL(bitmap_)) {
roaring::api::roaring64_bitmap_free(bitmap_);
bitmap_ = nullptr;
}
}
}
return ret;

View File

@ -517,6 +517,16 @@ int ObTableColumns::fill_row_cells(const ObTableSchema &table_schema,
cur_row_.cells_[cell_idx].set_varchar(ObString(static_cast<int32_t>(pos), buf));
cur_row_.cells_[cell_idx].set_collation_type(ObCharset::get_system_collation());
}
} else if (ob_is_roaringbitmap(def_obj.get_type())) {
if (min_data_version_ < DATA_VERSION_4_3_2_0) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "roaringbitmap type in data version less than 4.3.2.0");
} else if (OB_FAIL(def_obj.print_varchar_literal(buf, buf_len, pos, TZ_INFO(session_)))) {
LOG_WARN("fail to print varchar literal", K(ret), K(def_obj), K(buf_len), K(pos), K(buf));
} else {
cur_row_.cells_[cell_idx].set_varchar(ObString(static_cast<int32_t>(pos), buf));
cur_row_.cells_[cell_idx].set_collation_type(ObCharset::get_system_collation());
}
} else if (column_schema.is_default_expr_v2_column()) {
cur_row_.cells_[cell_idx].set_varchar(column_schema.get_cur_default_value().get_string());
cur_row_.cells_[cell_idx].set_collation_type(ObCharset::get_system_collation());

View File

@ -10675,18 +10675,18 @@ static int string_roaringbitmap(const ObObjType expect_type, ObObjCastParams &pa
LOG_WARN("invalid allocator", K(ret));
} else {
ObIAllocator &temp_allocator = *params.allocator_v2_;
ObString in_str;
ObRbBinType bin_type;
ObString in_str = nullptr;
ObString out_str = nullptr;
if (OB_FAIL(in.get_string(in_str))) {
LOG_WARN("fail to get string", K(ret));
} else if (OB_FAIL(ObRbUtils::check_get_bin_type(in_str, bin_type))){
LOG_WARN("invalid roaringbitmap binary string", K(ret));
} else if (OB_FAIL(ObRbUtils::build_binary(temp_allocator, in_str, out_str))) {
LOG_WARN("failed to build rb binary", K(ret));
} else {
sql::ObTextStringObObjResult text_result(ObRoaringBitmapType, &params, &out, true /*has_lob_header*/);
if (OB_FAIL(text_result.init(in_str.length(), params.allocator_v2_))) {
if (OB_FAIL(text_result.init(out_str.length(), params.allocator_v2_))) {
LOG_WARN("init lob result failed");
} else if (OB_FAIL(text_result.append(in_str.ptr(), in_str.length()))) {
LOG_WARN("failed to append realdata", K(ret), K(in_str), K(text_result));
} else if (OB_FAIL(text_result.append(out_str.ptr(), out_str.length()))) {
LOG_WARN("failed to append realdata", K(ret), K(out_str), K(text_result));
} else {
text_result.set_result();
}

View File

@ -361,10 +361,6 @@ int ObSchemaPrinter::print_table_definition_columns(const ObTableSchema &table_s
if (OB_SUCC(ret) && OB_FAIL(databuff_printf(buf, buf_len, pos, "'%s'", to_cstring(ObHexEscapeSqlStr(out_str))))) {
SHARE_SCHEMA_LOG(WARN, "fail to print default value of string tc", K(ret));
}
} else if (ob_is_roaringbitmap_tc(default_value.get_type())) {
if (OB_FAIL(print_roaringbitmap_default_value(table_schema, default_value, buf, buf_len, pos))) {
SHARE_SCHEMA_LOG(WARN, "fail to print default value of roaringbitmap", K(ret));
}
} else if (OB_FAIL(default_value.print_varchar_literal(buf, buf_len, pos, tz_info))) {
SHARE_SCHEMA_LOG(WARN, "fail to print sql literal", K(ret));
}
@ -5731,27 +5727,6 @@ int ObSchemaPrinter::print_table_definition_lob_params(const ObTableSchema &tabl
return ret;
}
int ObSchemaPrinter::print_roaringbitmap_default_value(const ObTableSchema &table_schema,
ObObj &default_value,
char* buf,
const int64_t& buf_len,
int64_t& pos) const
{
int ret = OB_SUCCESS;
ObString out_str = default_value.get_string();
const char *HEXCHARS = "0123456789ABCDEF";
for (int i = 0; OB_SUCC(ret) && i < out_str.length(); ++i) {
if (i == 0 && OB_FAIL(databuff_printf(buf, buf_len, pos, " 0x"))) {
SHARE_SCHEMA_LOG(WARN, "fail to print default value", K(ret));
} else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "%c%c",
HEXCHARS[*(out_str.ptr() + i) >> 4 & 0xF],
HEXCHARS[*(out_str.ptr() + i) & 0xF]))) {
SHARE_SCHEMA_LOG(WARN, "fail to print default value hex", K(ret));
}
}
return ret;
}
} // end namespace schema
} //end of namespace share
} // end namespace oceanbase

View File

@ -501,11 +501,6 @@ public:
const int64_t& buf_len,
int64_t& pos) const;
int print_roaringbitmap_default_value(const ObTableSchema &table_schema,
ObObj &default_value,
char* buf,
const int64_t& buf_len,
int64_t& pos) const;
private:
static bool is_subpartition_valid_in_mysql(const ObTableSchema &table_schema)
{

View File

@ -4165,18 +4165,18 @@ CAST_FUNC_NAME(string, roaringbitmap)
ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx);
common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator();
ObString in_str = child_res->get_string();
ObRbBinType bin_type;
ObString out_str = nullptr;
if (OB_FAIL(ObTextStringHelper::read_real_string_data(temp_allocator, *child_res,
expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), in_str))) {
LOG_WARN("failed to get real data.", K(ret), K(in_str));
} else if (OB_FAIL(ObRbUtils::check_get_bin_type(in_str, bin_type))) {
LOG_WARN("invalid roaringbitmap binary string", K(ret));
} else if (OB_FAIL(ObRbUtils::build_binary(temp_allocator, in_str, out_str))) {
LOG_WARN("failed to build rb binary", K(ret));
} else {
ObTextStringDatumResult text_result(ObRoaringBitmapType, &expr, &ctx, &res_datum);
if (OB_FAIL(text_result.init(in_str.length()))) {
if (OB_FAIL(text_result.init(out_str.length()))) {
LOG_WARN("Lob: init lob result failed");
} else if (OB_FAIL(text_result.append(in_str.ptr(), in_str.length()))) {
LOG_WARN("failed to append realdata", K(ret), K(in_str), K(text_result));
} else if (OB_FAIL(text_result.append(out_str.ptr(), out_str.length()))) {
LOG_WARN("failed to append realdata", K(ret), K(out_str), K(text_result));
} else {
text_result.set_result();
}

View File

@ -66,31 +66,27 @@ int ObExprRbBuildVarbinary::eval_rb_build_varbinary(const ObExpr &expr,
ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx);
common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator();
lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(ObRbExprHelper::get_tenant_id(ctx.exec_ctx_.get_my_session()), "ROARINGBITMAP"));
ObDatum *datum = NULL;
ObExpr *rb_arg = expr.args_[0];
bool is_null_result = false;
ObString rb_bin;
bool is_rb_null = false;
ObString rb_bin = nullptr;
ObString res_rb_bin = nullptr;
// get roaring string
if (OB_FAIL(expr.args_[0]->eval(ctx, datum))) {
LOG_WARN("failed to eval argument", K(ret));
} else if (datum->is_null()) {
if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, rb_arg, rb_bin, is_rb_null))) {
LOG_WARN("fail to get input roaringbitmap", K(ret));
} else if (is_rb_null || rb_bin == nullptr) {
is_null_result = true;
} else {
rb_bin = datum->get_string();
ObRbBinType bin_type;
if (OB_FAIL(ObTextStringHelper::read_real_string_data(tmp_allocator, *datum,
expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), rb_bin))) {
LOG_WARN("fail to get real string data", K(ret), K(rb_bin));
} else if (OB_FAIL(ObRbUtils::check_get_bin_type(rb_bin, bin_type))) {
LOG_WARN("invalid roaringbitmap binary", K(ret), K(rb_bin));
}
} else if (OB_FAIL(ObRbUtils::build_binary(tmp_allocator, rb_bin, res_rb_bin))) {
LOG_WARN("failed to build rb binary", K(ret));
}
if (OB_FAIL(ret)) {
} else if (is_null_result) {
res.set_null();
} else if (OB_FAIL(ObRbExprHelper::pack_rb_res(expr, ctx, res, rb_bin))) {
} else if (OB_FAIL(ObRbExprHelper::pack_rb_res(expr, ctx, res, res_rb_bin))) {
LOG_WARN("fail to pack roaringbitmap res", K(ret));
}
return ret;
}

View File

@ -63,12 +63,15 @@ int ObExprRbCardinality::eval_rb_cardinality(const ObExpr &expr, ObEvalCtx &ctx,
ObExpr *rb_arg = expr.args_[0];
bool is_rb_null = false;
ObString rb_bin;
ObRbBinType bin_type;
uint64_t cardinality = 0;
if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, rb_arg, rb_bin, is_rb_null))) {
LOG_WARN("fail to get input roaringbitmap", K(ret));
} else if (is_rb_null || rb_bin == nullptr) {
res.set_null();
} else if (OB_FAIL(ObRbUtils::get_cardinality(tmp_allocator, rb_bin, cardinality))){
} else if (OB_FAIL(ObRbUtils::check_get_bin_type(rb_bin, bin_type))) {
LOG_WARN("invalid roaringbitmap binary string", K(ret));
} else if (OB_FAIL(ObRbUtils::get_cardinality(tmp_allocator, rb_bin, bin_type, cardinality))){
LOG_WARN("failed to get cardinality from roaringbitmap binary", K(ret));
} else {
res.set_uint(cardinality);

View File

@ -5918,6 +5918,16 @@ BINARY opt_string_length_i_v2
$$->param_num_ = 0;
$$->sql_str_off_ = @1.first_column;
}
| ROARINGBITMAP
{
malloc_terminal_node($$, result->malloc_pool_, T_CAST_ARGUMENT);
$$->value_ = 0;
$$->int16_values_[OB_NODE_CAST_TYPE_IDX] = T_ROARINGBITMAP; /* data type */
$$->int16_values_[OB_NODE_CAST_COLL_IDX] = BINARY_COLLATION; /* data collation */
$$->int32_values_[OB_NODE_CAST_C_LEN_IDX] = 0; /* length */
$$->param_num_ = 0;
$$->sql_str_off_ = @1.first_column;
}
;
opt_integer:

View File

@ -189,10 +189,16 @@ TEST_F(TestRoaringBitmap, optimize)
}
ASSERT_EQ(rb->get_type(), ObRbType::BITMAP);
ASSERT_EQ(rb->get_cardinality(), 33);
// remove 32 value, remain 32 value
ASSERT_EQ(rb->value_remove(300), OB_SUCCESS);
ASSERT_EQ(rb->get_cardinality(), 32);
ASSERT_FALSE(rb->is_contains(300));
rb->optimize();
ASSERT_EQ(rb->get_type(), ObRbType::SET);
// remove 1 value, remain 33 value
ASSERT_EQ(rb->value_add(300), OB_SUCCESS);
ASSERT_EQ(rb->get_cardinality(), 33);
ASSERT_EQ(rb->get_type(), ObRbType::BITMAP);
// remove 32 value, remain 1 value
for (int i = 0; i < MAX_BITMAP_SET_VALUES; i++) {
ASSERT_EQ(rb->value_remove(300 + i), OB_SUCCESS);