From c76fda0bef4cdef034203b09db0109fd03c7e83d Mon Sep 17 00:00:00 2001 From: qijiax Date: Wed, 28 Aug 2024 03:45:02 +0000 Subject: [PATCH] [FEAT MERGE] Roaringbitmap type phase-2, performance optimization --- deps/oblib/src/common/object/ob_object.cpp | 10 +- deps/oblib/src/lib/CMakeLists.txt | 1 + .../oblib/src/lib/roaringbitmap/ob_rb_bin.cpp | 766 ++++++++++++++++-- deps/oblib/src/lib/roaringbitmap/ob_rb_bin.h | 41 +- .../lib/roaringbitmap/ob_rb_memory_mgr.cpp | 184 +++++ .../src/lib/roaringbitmap/ob_rb_memory_mgr.h | 57 ++ .../src/lib/roaringbitmap/ob_rb_utils.cpp | 735 ++++++++++++----- .../oblib/src/lib/roaringbitmap/ob_rb_utils.h | 32 +- .../lib/roaringbitmap/ob_roaringbitmap.cpp | 583 ++++++------- .../src/lib/roaringbitmap/ob_roaringbitmap.h | 12 +- mittest/mtlenv/mock_tenant_module_env.h | 2 + src/observer/ob_server.cpp | 7 + src/observer/omt/ob_multi_tenant.cpp | 2 + src/share/rc/ob_tenant_base.h | 2 + .../aggregate/ob_aggregate_processor.cpp | 38 +- .../engine/expr/ob_expr_rb_build_empty.cpp | 10 +- .../expr/ob_expr_rb_build_varbinary.cpp | 13 +- src/sql/engine/expr/ob_expr_rb_calc.cpp | 75 +- .../expr/ob_expr_rb_calc_cardinality.cpp | 30 +- .../engine/expr/ob_expr_rb_cardinality.cpp | 5 +- .../engine/expr/ob_expr_rb_func_helper.cpp | 46 +- src/sql/engine/expr/ob_expr_rb_is_empty.cpp | 20 +- src/sql/engine/expr/ob_expr_rb_to_string.cpp | 21 +- .../engine/expr/ob_expr_rb_to_varbinary.cpp | 20 +- unittest/share/test_roaringbitmap.cpp | 16 +- 25 files changed, 1958 insertions(+), 770 deletions(-) create mode 100644 deps/oblib/src/lib/roaringbitmap/ob_rb_memory_mgr.cpp create mode 100644 deps/oblib/src/lib/roaringbitmap/ob_rb_memory_mgr.h diff --git a/deps/oblib/src/common/object/ob_object.cpp b/deps/oblib/src/common/object/ob_object.cpp index 94ee08f13d..f8575f15fc 100644 --- a/deps/oblib/src/common/object/ob_object.cpp +++ b/deps/oblib/src/common/object/ob_object.cpp @@ -1311,13 +1311,19 @@ int ObObj::build_not_strict_default_value(int16_t precision) case ObTextType: case ObMediumTextType: case ObLongTextType: - case ObGeometryType: - case ObRoaringBitmapType:{ + case ObGeometryType:{ ObString null_str; set_string(data_type, null_str); meta_.set_inrow(); } break; + case ObRoaringBitmapType:{ + // empty string is illegal in roaringbitmap, 0x01000 corresponding to an empty roaringbitmap + ObString empty_str = ObString(2, "\x01\x00"); + set_string(data_type, empty_str); + meta_.set_inrow(); + } + break; case ObJsonType: { set_json_value(data_type, OB_JSON_NULL, 2); } diff --git a/deps/oblib/src/lib/CMakeLists.txt b/deps/oblib/src/lib/CMakeLists.txt index 250affc1f6..4424571a65 100644 --- a/deps/oblib/src/lib/CMakeLists.txt +++ b/deps/oblib/src/lib/CMakeLists.txt @@ -121,6 +121,7 @@ ob_set_subtarget(oblib_lib roaringbitmap roaringbitmap/ob_roaringbitmap.cpp roaringbitmap/ob_rb_utils.cpp roaringbitmap/ob_rb_bin.cpp + roaringbitmap/ob_rb_memory_mgr.cpp ) ob_set_subtarget(oblib_lib encode diff --git a/deps/oblib/src/lib/roaringbitmap/ob_rb_bin.cpp b/deps/oblib/src/lib/roaringbitmap/ob_rb_bin.cpp index 65bfa23768..e74746a175 100644 --- a/deps/oblib/src/lib/roaringbitmap/ob_rb_bin.cpp +++ b/deps/oblib/src/lib/roaringbitmap/ob_rb_bin.cpp @@ -13,24 +13,22 @@ #define USING_LOG_PREFIX LIB #include "lib/ob_errno.h" #include "lib/roaringbitmap/ob_rb_bin.h" +#include "roaring/roaring_array.h" namespace oceanbase { namespace common { -static const uint32_t ROARING_SERIAL_COOKIE_NO_RUNCONTAINER = 12346; -static const uint32_t ROARING_SERIAL_COOKIE = 12347; -static const uint32_t ROARING_NO_OFFSET_THRESHOLD = 4; -static const uint32_t ROARING_DEFAULT_MAX_SIZE = 4096; -static const uint32_t ROARING_BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64; - int ObRoaringBin::init() { int ret = OB_SUCCESS; size_t read_bytes = 0; - char * buf = roaring_bin_.ptr(); - uint32_t cookie = 0; + char *buf = roaring_bin_.ptr(); + int32_t cookie = 0; - if (roaring_bin_ == nullptr || roaring_bin_.empty()) { + if (OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is null", K(ret)); + } else if (roaring_bin_ == nullptr || roaring_bin_.empty()) { ret = OB_INVALID_DATA; LOG_WARN("roaringbitmap binary is empty", K(ret)); } @@ -76,11 +74,13 @@ int ObRoaringBin::init() } else { cookie = *reinterpret_cast(buf); buf += sizeof(int32_t); - if ((cookie & 0xFFFF) != ROARING_SERIAL_COOKIE && cookie != ROARING_SERIAL_COOKIE_NO_RUNCONTAINER) { + if ((cookie & 0xFFFF) != roaring::internal::SERIAL_COOKIE + && cookie != roaring::internal::SERIAL_COOKIE_NO_RUNCONTAINER) { ret = OB_INVALID_DATA; LOG_WARN("invalid cookie from roaring binary", K(ret), K(cookie)); - } else if ((cookie & 0xFFFF) == ROARING_SERIAL_COOKIE) { + } else if ((cookie & 0xFFFF) == roaring::internal::SERIAL_COOKIE) { size_ = (cookie >> 16) + 1; + hasrun_ = true; } else if (OB_FALSE_IT(read_bytes += sizeof(int32_t))){ } else if (read_bytes > roaring_bin_.length()) { ret = OB_INVALID_DATA; @@ -98,18 +98,15 @@ int ObRoaringBin::init() } // get run container bitmap if (OB_FAIL(ret)) { - } else { - hasrun_ = (cookie & 0xFFFF) == ROARING_SERIAL_COOKIE; - if (hasrun_) { - int32_t s = (size_ + 7) / 8; - read_bytes += s; - if(read_bytes > roaring_bin_.length()) { - ret = OB_INVALID_DATA; - LOG_WARN("ran out of bytes while reading run bitmap", K(ret), K(read_bytes), K(roaring_bin_.length())); - } else { - bitmapOfRunContainers_ = buf; - buf += s; - } + } else if (hasrun_) { + int32_t run_bitmap_size = (size_ + 7) / 8; + read_bytes += run_bitmap_size; + if (read_bytes > roaring_bin_.length()) { + ret = OB_INVALID_DATA; + LOG_WARN("ran out of bytes while reading run bitmap", K(ret), K(read_bytes), K(roaring_bin_.length())); + } else { + run_bitmap_ = buf; + buf += run_bitmap_size; } } // get keyscards @@ -118,31 +115,41 @@ int ObRoaringBin::init() } else if (read_bytes > roaring_bin_.length()) { ret = OB_INVALID_DATA; LOG_WARN("ran out of bytes while reading keycards", K(ret), K(read_bytes), K(roaring_bin_.length())); - } else if (OB_ISNULL(keyscards_ = static_cast(allocator_->alloc(size_ * 2 * sizeof(uint16_t))))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to alloc memory for keyscards", K(ret), K(size_ * 2 * sizeof(uint16_t))); } else { - MEMCPY(keyscards_, buf, size_ * 2 * sizeof(uint16_t)); + keyscards_ = (uint16_t *)buf; buf += size_ * 2 * sizeof(uint16_t); + if ((uintptr_t)keyscards_ % sizeof(uint16_t) != 0) { + uint16_t * tmp_buf = nullptr; + if (OB_ISNULL(tmp_buf = static_cast(allocator_->alloc(size_ * 2 * sizeof(uint16_t))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for tmpbuf", K(ret), K(size_ * 2 * sizeof(uint16_t))); + } else { + MEMCPY(tmp_buf, keyscards_, size_ * 2 * sizeof(uint16_t)); + keyscards_ = tmp_buf; + } + } } // get offsets if (OB_FAIL(ret) || size_ == 0) { - } else if ((!hasrun_) || (size_ >= ROARING_NO_OFFSET_THRESHOLD)) { + } else if ((!hasrun_) || (size_ >= roaring::internal::NO_OFFSET_THRESHOLD)) { // has offsets read_bytes += size_ * sizeof(uint32_t); if (read_bytes > roaring_bin_.length()) { ret = OB_INVALID_DATA; LOG_WARN("ran out of bytes while reading offsets", K(ret), K(read_bytes), K(roaring_bin_.length())); - } else if ((uintptr_t)buf % sizeof(uint32_t) == 0) { - // use buffer directly + } else { offsets_ = (uint32_t *)buf; buf += size_ * sizeof(uint32_t); - } else if (OB_ISNULL(offsets_ = static_cast(allocator_->alloc(size_ * sizeof(uint32_t))))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to alloc memory for offsets_", K(ret), K(size_ * sizeof(uint32_t))); - } else { - MEMCPY(offsets_, buf, size_ * sizeof(uint32_t)); - buf += size_ * sizeof(uint32_t); + if ((uintptr_t)offsets_ % sizeof(uint32_t) != 0) { + uint32_t * tmp_buf = nullptr; + if (OB_ISNULL(tmp_buf = static_cast(allocator_->alloc(size_ * sizeof(uint32_t))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for tmpbuf", K(ret), K(size_ * sizeof(uint32_t))); + } else { + MEMCPY(tmp_buf, offsets_, size_ * sizeof(uint32_t)); + offsets_ = tmp_buf; + } + } } // check binary length if (OB_FAIL(ret)) { @@ -150,7 +157,7 @@ int ObRoaringBin::init() // the last container size_t offset = offsets_[size_ - 1]; size_t container_size = 0; - if (OB_FAIL(get_container_size(size_ - 1, container_size))) { + if (OB_FAIL(get_container_size_at_index(size_ - 1, container_size))) { LOG_WARN("failed to get container size", K(ret), K(size_)); } else if (offset + container_size > roaring_bin_.length()) { ret = OB_INVALID_DATA; @@ -167,7 +174,7 @@ int ObRoaringBin::init() for (int32_t k = 0; OB_SUCC(ret) && k < size_; ++k) { offsets_[k] = read_bytes; size_t container_size = 0; - if (OB_FAIL(get_container_size(k, container_size))) { + if (OB_FAIL(get_container_size_at_index(k, container_size))) { LOG_WARN("failed to get container size", K(ret), K(k)); } else { read_bytes += container_size; @@ -183,6 +190,9 @@ int ObRoaringBin::init() } } + if (OB_SUCC(ret)) { + inited_ = true; + } return ret; } @@ -190,50 +200,464 @@ int ObRoaringBin::get_cardinality(uint64_t &cardinality) { int ret = OB_SUCCESS; cardinality = 0; - if (size_ == 0) { - // do nothing - } else if (OB_ISNULL(keyscards_)) { + if (!this->is_inited()) { ret = OB_NOT_INIT; - LOG_WARN("ObRoaringBin is not init", K(ret)); + LOG_WARN("ObRoaringBin is not inited", K(ret)); + } else if (size_ == 0) { + // do nothing } else { for (int i = 0; i < size_; ++i) { - cardinality += keyscards_[2 * i + 1] + 1; + cardinality += this->get_card_at_index(i); } } return ret; } -int ObRoaringBin::get_container_size(uint32_t n, size_t &container_size) +int ObRoaringBin::contains(uint32_t value, bool &is_contains) { int ret = OB_SUCCESS; - uint32_t this_card = keyscards_[2 * n + 1] + 1; - size_t offset = offsets_[n]; - bool is_bitmap = (this_card > ROARING_DEFAULT_MAX_SIZE); - bool is_run = false; - if (hasrun_ && (bitmapOfRunContainers_[n / 8] & (1 << (n % 8))) != 0) { - is_bitmap = false; - is_run = true; - } - if (is_bitmap) { - // bitmap container - container_size = ROARING_BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - } else if (is_run) { - // run container - if (offset + sizeof(uint16_t) > roaring_bin_.length()) { - ret = OB_INVALID_DATA; - LOG_WARN("ran out of bytes while reading a run container (header)", K(ret), K(offset), K(n), K(roaring_bin_.length())); + is_contains = false; + uint16_t key = static_cast(value >> 16); + uint16_t lowvalue = static_cast(value & 0xFFFF); + int32_t idx = 0; + if (!this->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("ObRoaringBin is not inited", K(ret)); + } else if (OB_FALSE_IT(idx = this->key_advance_until(-1, key))){ + } else if (key == this->get_key_at_index(idx)) { + uint8_t container_type = 0; + roaring::api::container_s *container = nullptr; + if (OB_FAIL(this->get_container_at_index(idx, container_type, container))) { + LOG_WARN("failed to get container at index", K(ret), K(idx)); } else { - uint16_t n_runs = *reinterpret_cast(roaring_bin_.ptr() + offset); - container_size = sizeof(uint16_t) + n_runs * 2 * sizeof(uint16_t); + is_contains = roaring::internal::container_contains(container, lowvalue, container_type); + } + if (OB_NOT_NULL(container)) { + roaring::internal::container_free(container, container_type); } - } else { - // array container - container_size = this_card * sizeof(uint16_t); } return ret; } +int ObRoaringBin::calc_and_cardinality(ObRoaringBin *rb, uint64_t &cardinality) +{ + int ret = OB_SUCCESS; + cardinality = 0; + if (!this->is_inited() || !rb->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("ObRoaringBin is not inited", K(ret)); + } else { + int32_t l_idx = 0; + int32_t r_idx = 0; + while (OB_SUCC(ret) && l_idx < size_ && r_idx < rb->size_) { + uint16_t l_key = this->get_key_at_index(l_idx); + uint16_t r_key = rb->get_key_at_index(r_idx); + if (l_key < r_key) { + l_idx = this->key_advance_until(l_idx, r_key); + } else if (l_key > r_key) { + r_idx = rb->key_advance_until(r_idx, l_key); + } else { + // l_key == r_key + int container_card = 0; + uint8_t l_container_type = 0; + uint8_t r_container_type = 0; + roaring::api::container_s *l_container = nullptr; + roaring::api::container_s *r_container = nullptr; + if (OB_FAIL(this->get_container_at_index(l_idx, l_container_type, l_container))) { + LOG_WARN("failed to get container at index from left ObRoaringBin", K(ret), K(l_idx)); + } else if (OB_FAIL(rb->get_container_at_index(r_idx, r_container_type, r_container))) { + LOG_WARN("failed to get container at index from right ObRoaringBin", K(ret), K(r_idx)); + } else { + container_card = roaring::internal::container_and_cardinality(l_container, l_container_type, r_container, r_container_type); + cardinality += container_card; + } + l_idx++; + r_idx++; + if (OB_NOT_NULL(l_container)) { + roaring::internal::container_free(l_container, l_container_type); + } + if (OB_NOT_NULL(r_container)) { + roaring::internal::container_free(r_container, r_container_type); + } + } + } // end while + } + return ret; +} + +int ObRoaringBin::calc_and(ObRoaringBin *rb, ObStringBuffer &res_buf, uint64_t &res_card, uint32_t high32) +{ + int ret = OB_SUCCESS; + int32_t need_size = 0; + roaring_bitmap_t *res_bitmap = nullptr; + res_card = 0; + if (!this->is_inited() || !rb->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("ObRoaringBin is not inited", K(ret)); + } else if (OB_FALSE_IT(need_size = this->size_ > rb->size_ ? this->size_ : rb->size_)) { + } else { + ROARING_TRY_CATCH(res_bitmap = roaring::api::roaring_bitmap_create_with_capacity(need_size)); + if (OB_SUCC(ret) && OB_ISNULL(res_bitmap)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create roaring_bitmap", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + int32_t l_idx = 0; + int32_t r_idx = 0; + while (OB_SUCC(ret) && l_idx < size_ && r_idx < rb->size_) { + uint16_t l_key = this->get_key_at_index(l_idx); + uint16_t r_key = rb->get_key_at_index(r_idx); + if (l_key < r_key) { + l_idx = this->key_advance_until(l_idx, r_key); + } else if (l_key > r_key) { + r_idx = rb->key_advance_until(r_idx, l_key); + } else { + // l_key == r_key + uint8_t l_container_type = 0; + uint8_t r_container_type = 0; + uint8_t res_container_type = 0; + roaring::api::container_s *l_container = nullptr; + roaring::api::container_s *r_container = nullptr; + roaring::api::container_s *res_container = nullptr; + int res_container_card = 0; + if (OB_FAIL(this->get_container_at_index(l_idx, l_container_type, l_container))) { + LOG_WARN("failed to get container at index from left ObRoaringBin", K(ret), K(l_idx)); + } else if (OB_FAIL(rb->get_container_at_index(r_idx, r_container_type, r_container))) { + LOG_WARN("failed to get container at index from right ObRoaringBin", K(ret), K(r_idx)); + } else { + ROARING_TRY_CATCH(res_container = roaring::internal::container_and(l_container, l_container_type, r_container, r_container_type, &res_container_type)); + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(res_container)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to calculate container and", K(ret), K(l_container), K(l_container_type), K(r_container_type), K(r_container)); + } else if (OB_FALSE_IT(res_container_card = roaring::internal::container_get_cardinality(res_container, res_container_type))) { + } else if (res_container_card > 0) { + ROARING_TRY_CATCH(roaring::internal::ra_append(&res_bitmap->high_low_container, l_key, res_container, res_container_type)); + if (OB_SUCC(ret)) { + res_card += res_container_card; + } + } else { + roaring::internal::container_free(res_container, res_container_type); + } + l_idx++; + r_idx++; + if (OB_NOT_NULL(l_container)) { + roaring::internal::container_free(l_container, l_container_type); + } + if (OB_NOT_NULL(r_container)) { + roaring::internal::container_free(r_container, r_container_type); + } + } + } // end while + // append high32 and serialized roaring_bitmap to res_buf + if (OB_SUCC(ret) && res_card > 0) { + uint64_t serial_size = 0; + ROARING_TRY_CATCH(serial_size = static_cast(roaring::api::roaring_bitmap_portable_size_in_bytes(res_bitmap))); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(res_buf.reserve(sizeof(uint32_t) + serial_size))) { + LOG_WARN("failed to reserve buffer", K(ret), K(serial_size)); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&high32), sizeof(uint32_t)))) { + LOG_WARN("fail to append high32", K(ret), K(high32)); + } else if (serial_size != roaring::api::roaring_bitmap_portable_serialize(res_bitmap, res_buf.ptr() + res_buf.length())) { + ret = OB_SERIALIZE_ERROR; + LOG_WARN("serialize size not match", K(ret), K(serial_size)); + } else if (OB_FAIL(res_buf.set_length(res_buf.length() + serial_size))) { + LOG_WARN("failed to set buffer length", K(ret)); + } + } + } + if (OB_NOT_NULL(res_bitmap)) { + roaring::api::roaring_bitmap_free(res_bitmap); + } + return ret; +} + +int ObRoaringBin::calc_andnot(ObRoaringBin *rb, ObStringBuffer &res_buf, uint64_t &res_card, uint32_t high32) +{ + int ret = OB_SUCCESS; + roaring_bitmap_t *res_bitmap = nullptr; + res_card = 0; + if (!this->is_inited() || !rb->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("ObRoaringBin is not inited", K(ret)); + } else { + ROARING_TRY_CATCH(res_bitmap = roaring::api::roaring_bitmap_create_with_capacity(this->size_)); + if (OB_SUCC(ret) && OB_ISNULL(res_bitmap)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create roaring_bitmap", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + int32_t l_idx = 0; + int32_t r_idx = 0; + while (OB_SUCC(ret) && l_idx < size_ && r_idx < rb->size_) { + uint16_t l_key = this->get_key_at_index(l_idx); + uint16_t r_key = rb->get_key_at_index(r_idx); + if (l_key == r_key) { + uint8_t l_container_type = 0; + uint8_t r_container_type = 0; + uint8_t res_container_type = 0; + roaring::api::container_s *l_container = nullptr; + roaring::api::container_s *r_container = nullptr; + roaring::api::container_s *res_container = nullptr; + int res_container_card = 0; + if (OB_FAIL(this->get_container_at_index(l_idx, l_container_type, l_container))) { + LOG_WARN("failed to get container at index from left ObRoaringBin", K(ret), K(l_idx)); + } else if (OB_FAIL(rb->get_container_at_index(r_idx, r_container_type, r_container))) { + LOG_WARN("failed to get container at index from right ObRoaringBin", K(ret), K(r_idx)); + } else { + ROARING_TRY_CATCH(res_container = roaring::internal::container_andnot(l_container, l_container_type, r_container, r_container_type, &res_container_type)); + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(res_container)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to calculate container andnot", K(ret), K(l_container), K(l_container_type), K(r_container_type), K(r_container)); + } else if (OB_FALSE_IT(res_container_card = roaring::internal::container_get_cardinality(res_container, res_container_type))) { + } else if (res_container_card > 0) { + ROARING_TRY_CATCH(roaring::internal::ra_append(&res_bitmap->high_low_container, l_key, res_container, res_container_type)); + if (OB_SUCC(ret)) { + res_card += res_container_card; + } + } else { + roaring::internal::container_free(res_container, res_container_type); + } + l_idx++; + r_idx++; + if (OB_NOT_NULL(l_container)) { + roaring::internal::container_free(l_container, l_container_type); + } + if (OB_NOT_NULL(r_container)) { + roaring::internal::container_free(r_container, r_container_type); + } + } else { + uint8_t res_container_type = 0; + roaring::api::container_s *res_container = nullptr; + int res_container_card = 0; + if (OB_FAIL(this->get_container_at_index(l_idx, res_container_type, res_container))) { + LOG_WARN("failed to get container at index from left ObRoaringBin", K(ret), K(l_idx)); + } else if (OB_FALSE_IT(res_container_card = roaring::internal::container_get_cardinality(res_container, res_container_type))) { + } else if (res_container_card > 0) { + ROARING_TRY_CATCH(roaring::internal::ra_append(&res_bitmap->high_low_container, l_key, res_container, res_container_type)); + if (OB_SUCC(ret)) { + res_card += res_container_card; + } + } else if (OB_NOT_NULL(res_container)) { + roaring::internal::container_free(res_container, res_container_type); + } + l_idx++; + } + } // end while + while (OB_SUCC(ret) && l_idx < size_) { + uint16_t l_key = this->get_key_at_index(l_idx); + uint8_t res_container_type = 0; + roaring::api::container_s *res_container = nullptr; + int res_container_card = 0; + if (OB_FAIL(this->get_container_at_index(l_idx, res_container_type, res_container))) { + LOG_WARN("failed to get container at index from left ObRoaringBin", K(ret), K(l_idx)); + } else if (OB_FALSE_IT(res_container_card = roaring::internal::container_get_cardinality(res_container, res_container_type))) { + } else if (res_container_card > 0) { + ROARING_TRY_CATCH(roaring::internal::ra_append(&res_bitmap->high_low_container, l_key, res_container, res_container_type)); + if (OB_SUCC(ret)) { + res_card += res_container_card; + } + } else if (OB_NOT_NULL(res_container)) { + roaring::internal::container_free(res_container, res_container_type); + } + l_idx++; + } // end while + // append high32 and serialized roaring_bitmap to res_buf + if (OB_SUCC(ret) && res_card > 0) { + uint64_t serial_size = 0; + ROARING_TRY_CATCH(serial_size = static_cast(roaring::api::roaring_bitmap_portable_size_in_bytes(res_bitmap))); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(res_buf.reserve(sizeof(uint32_t) + serial_size))) { + LOG_WARN("failed to reserve buffer", K(ret), K(serial_size)); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&high32), sizeof(uint32_t)))) { + LOG_WARN("fail to append high32", K(ret), K(high32)); + } else if (serial_size != roaring::api::roaring_bitmap_portable_serialize(res_bitmap, res_buf.ptr() + res_buf.length())) { + ret = OB_SERIALIZE_ERROR; + LOG_WARN("serialize size not match", K(ret), K(serial_size)); + } else if (OB_FAIL(res_buf.set_length(res_buf.length() + serial_size))) { + LOG_WARN("failed to set buffer length", K(ret)); + } + } + } + if (OB_NOT_NULL(res_bitmap)) { + roaring::api::roaring_bitmap_free(res_bitmap); + } + return ret; +} + +int ObRoaringBin::get_container_size_at_index(uint16_t idx, size_t &container_size) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(keyscards_) || OB_ISNULL(offsets_) + || (hasrun_ && OB_ISNULL(run_bitmap_))) { + ret = OB_NOT_INIT; + LOG_WARN("ObRoaringBin is not inited", K(ret)); + } else if (idx >= size_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(idx), K(size_)); + } else { + int32_t this_card = this->get_card_at_index(idx); + size_t offset = offsets_[idx]; + bool is_bitmap = (this_card > roaring::internal::DEFAULT_MAX_SIZE); + bool is_run = false; + if (is_run_at_index(idx)) { + is_bitmap = false; + is_run = true; + } + if (is_bitmap) { + // bitmap container + container_size = roaring::internal::BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + } else if (is_run) { + // run container + if (offset + sizeof(uint16_t) > roaring_bin_.length()) { + ret = OB_INVALID_DATA; + LOG_WARN("ran out of bytes while reading a run container (header)", K(ret), K(offset), K(idx), K(roaring_bin_.length())); + } else { + uint16_t n_runs = *reinterpret_cast(roaring_bin_.ptr() + offset); + container_size = sizeof(uint16_t) + n_runs * 2 * sizeof(uint16_t); + } + } else { + // array container + container_size = this_card * sizeof(uint16_t); + } + } + return ret; +} + +int ObRoaringBin::get_container_at_index(uint16_t idx, uint8_t &container_type, container_s *&container) +{ + int ret = OB_SUCCESS; + if (!this->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("ObRoaringBin is not inited", K(ret)); + } else if (idx >= size_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(idx), K(size_)); + } else { + size_t read_bytes = offsets_[idx]; + char *buf = roaring_bin_.ptr() + offsets_[idx]; + int32_t container_card = this->get_card_at_index(idx); + bool is_bitmap = (container_card > roaring::internal::DEFAULT_MAX_SIZE); + bool is_run = false; + if (is_run_at_index(idx)) { + is_bitmap = false; + is_run = true; + } + if (is_bitmap) { + // bitmap container + size_t container_size = roaring::internal::BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + read_bytes += container_size; + if (read_bytes > roaring_bin_.length()) { + ret = OB_INVALID_DATA; + LOG_WARN("ran out of bytes while reading a bitmap container", K(ret), K(read_bytes), K(idx), K(container_size), K(offsets_[idx]), K(roaring_bin_.length())); + } else { + roaring::internal::bitset_container_t * container_ptr = nullptr; + ROARING_TRY_CATCH(container_ptr = roaring::internal::bitset_container_create()); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(container_ptr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for container", K(ret)); + } else { + roaring::internal::bitset_container_read(container_card, container_ptr, buf); + container = container_ptr; + container_type = BITSET_CONTAINER_TYPE; + } + } + } else if (is_run) { + // run container + read_bytes += sizeof(uint16_t); + if(read_bytes > roaring_bin_.length()) { + ret = OB_INVALID_DATA; + LOG_WARN("ran out of bytes while reading a run container (header)", K(ret), K(read_bytes), K(idx), K(roaring_bin_.length())); + } else { + uint16_t n_runs = *reinterpret_cast(buf); + size_t container_size = n_runs * sizeof(roaring::internal::rle16_t); + read_bytes += container_size; + if (read_bytes > roaring_bin_.length()) { + ret = OB_INVALID_DATA; + LOG_WARN("ran out of bytes while reading a run container", K(ret), K(read_bytes), K(idx), K(container_size), K(offsets_[idx]), K(roaring_bin_.length())); + } else { + roaring::internal::run_container_t * container_ptr = nullptr; + ROARING_TRY_CATCH(container_ptr = roaring::internal::run_container_create()); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(container_ptr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for container", K(ret)); + } else { + roaring::internal::run_container_read(container_card, container_ptr, buf); + container = container_ptr; + container_type = RUN_CONTAINER_TYPE; + } + } + } + } else { + // array container + size_t container_size = container_card * sizeof(uint16_t); + read_bytes += container_size; + if (read_bytes > roaring_bin_.length()) { + ret = OB_INVALID_DATA; + LOG_WARN("ran out of bytes while reading an array container", K(ret), K(read_bytes), K(idx), K(roaring_bin_.length())); + } else { + roaring::internal::array_container_t * container_ptr = nullptr; + ROARING_TRY_CATCH(container_ptr = roaring::internal::array_container_create()); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(container_ptr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for container", K(ret)); + } else { + roaring::internal::array_container_read(container_card, container_ptr, buf); + container = container_ptr; + container_type = ARRAY_CONTAINER_TYPE; + } + } + } + } + return ret; +} + +int32_t ObRoaringBin::key_advance_until(int32_t idx, uint16_t min) +{ + int32_t res_idx = 0; + int32_t lower = idx + 1; + if ((lower >= size_) || (this->get_card_at_index(lower) >= min)) { + res_idx = lower; + } else { + int32_t spansize = 1; + while ((lower + spansize < size_) && (this->get_card_at_index(lower + spansize) < min)) { + spansize *= 2; + } + int32_t upper = (lower + spansize < size_) ? lower + spansize : size_ - 1; + if (this->get_card_at_index(upper) == min) { + res_idx = upper; + } else if (this->get_card_at_index(upper) < min) { + // means keyscards_ has no item >= min + res_idx = size_; + } else { + lower += (spansize / 2); + int32_t mid = 0; + while (lower + 1 != upper) { + mid = (lower + upper) / 2; + if (this->get_card_at_index(mid) == min) { + return mid; + } else if (this->get_card_at_index(mid) < min) { + lower = mid; + } else { + upper = mid; + } + } + res_idx = upper; + } + } + return res_idx; +} int ObRoaring64Bin::init() { @@ -241,7 +665,10 @@ int ObRoaring64Bin::init() size_t read_bytes = 0; char * buf = roaring_bin_.ptr(); - if (roaring_bin_ == nullptr || roaring_bin_.empty()) { + if (OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is null", K(ret)); + } else if (roaring_bin_ == nullptr || roaring_bin_.empty()) { ret = OB_INVALID_DATA; LOG_WARN("roaringbitmap binary is empty", K(ret)); } @@ -280,9 +707,6 @@ int ObRoaring64Bin::init() } else if (OB_ISNULL(roaring_bufs_ = static_cast(allocator_->alloc(buckets_ * sizeof(ObRoaringBin *))))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to alloc memory for roaring_bufs", K(ret), K(buckets_ * sizeof(ObRoaringBin *))); - } else if (OB_ISNULL(offsets_ = static_cast(allocator_->alloc(buckets_ * sizeof(uint32_t))))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to alloc memory for offsets", K(ret), K(buckets_ * sizeof(uint32_t))); } else { for (uint64_t bucket = 0; OB_SUCC(ret) && bucket < buckets_; ++bucket) { ObString roaring_bin; @@ -294,7 +718,6 @@ int ObRoaring64Bin::init() } else { high32_[bucket] = *reinterpret_cast(buf); buf += sizeof(uint32_t); - offsets_[bucket] = read_bytes; } // get roaring_buf (32bits) if (OB_FAIL(ret)) { @@ -311,6 +734,9 @@ int ObRoaring64Bin::init() } } + if (OB_SUCC(ret)) { + inited_ = true; + } return ret; } @@ -318,11 +744,11 @@ int ObRoaring64Bin::get_cardinality(uint64_t &cardinality) { int ret = OB_SUCCESS; cardinality = 0; - if (buckets_ == 0) { - // do nothing - } else if (OB_ISNULL(roaring_bufs_)) { + if (!this->is_inited()) { ret = OB_NOT_INIT; - LOG_WARN("ObRoaringBin is not init", K(ret)); + LOG_WARN("ObRoaring64Bin is not inited", K(ret)); + } else if (buckets_ == 0) { + // do nothing } else { for (int i = 0; OB_SUCC(ret) && i < buckets_; ++i) { @@ -337,5 +763,197 @@ int ObRoaring64Bin::get_cardinality(uint64_t &cardinality) return ret; } +int ObRoaring64Bin::contains(uint64_t value, bool &is_contains) +{ + int ret = OB_SUCCESS; + is_contains = false; + uint32_t high32 = static_cast(value >> 32); + uint32_t low32 = static_cast(value & 0xFFFFFFFF); + if (!this->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("ObRoaring64Bin is not inited", K(ret)); + } else { + for (int i = 0; OB_SUCC(ret) && i < buckets_ && high32 <= high32_[i]; ++i) { + if (high32 == high32_[i] && OB_FAIL(roaring_bufs_[i]->contains(low32, is_contains))) { + LOG_WARN("fail to check value is_contains in ObRoaringBin", K(ret), K(i), K(low32)); + } + } + } + return ret; +} + +int ObRoaring64Bin::calc_and_cardinality(ObRoaring64Bin *rb, uint64_t &cardinality) +{ + int ret = OB_SUCCESS; + cardinality = 0; + if (!this->is_inited() || !rb->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("ObRoaring64Bin is not inited", K(ret)); + } else { + uint64_t l_idx = 0; + uint64_t r_idx = 0; + while(OB_SUCC(ret) && l_idx < buckets_ && r_idx < rb->buckets_) { + uint32_t l_high32 = high32_[l_idx]; + uint32_t r_high32 = rb->high32_[r_idx]; + if (l_high32 < r_high32) { + l_idx++; + } else if (l_high32 > r_high32){ + r_idx++; + } else { + // l_high32 == r_high32 + uint64_t rb32_card = 0; + if (OB_FAIL(roaring_bufs_[l_idx]->calc_and_cardinality(rb->roaring_bufs_[r_idx], rb32_card))) { + LOG_WARN("fail to calc and cardinality", K(ret), K(l_idx), K(r_idx)); + } else { + cardinality += rb32_card; + l_idx++; + r_idx++; + } + } + } // end while + } + return ret; +} + +int ObRoaring64Bin::calc_and(ObRoaring64Bin *rb, ObStringBuffer &res_buf, uint64_t &res_card) +{ + int ret = OB_SUCCESS; + res_card = 0; + uint64_t buckets = 0; + uint64_t buckets_offset = res_buf.length(); + if (!this->is_inited() || !rb->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("ObRoaring64Bin is not inited", K(ret)); + } else if (OB_FAIL(res_buf.reserve(roaring_bin_.length() > rb->roaring_bin_.length() ? roaring_bin_.length() : rb->roaring_bin_.length()))) { + LOG_WARN("failed to reserve buffer", K(ret), K(roaring_bin_.length()), K(rb->roaring_bin_.length())); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&buckets), sizeof(uint64_t)))) { + LOG_WARN("fail to append buckets"); + } else { + uint64_t l_idx = 0; + uint64_t r_idx = 0; + while(OB_SUCC(ret) && l_idx < buckets_ && r_idx < rb->buckets_) { + uint32_t l_high32 = high32_[l_idx]; + uint32_t r_high32 = rb->high32_[r_idx]; + if (l_high32 < r_high32) { + l_idx = this->high32_advance_until(l_idx, r_high32); + } else if (l_high32 > r_high32){ + r_idx = rb->high32_advance_until(r_idx, l_high32); + } else { + // l_high32 == r_high32 + uint64_t rb32_card = 0; + ObString rb32_bin = nullptr; + if (OB_FAIL(roaring_bufs_[l_idx]->calc_and(rb->roaring_bufs_[r_idx], res_buf ,rb32_card, l_high32))) { + LOG_WARN("fail to calculate ObRoaringBin andnot", K(ret), K(l_idx), K(r_idx)); + } else if (rb32_card > 0) { + res_card += rb32_card; + buckets++; + } + l_idx++; + r_idx++; + } + } // end while + } + // modify buckets in res_buf + if (OB_SUCC(ret) && buckets > 0) { + uint64_t *buckets_ptr = reinterpret_cast(res_buf.ptr() + buckets_offset); + *buckets_ptr = buckets; + } + return ret; +} + +int ObRoaring64Bin::calc_andnot(ObRoaring64Bin *rb, ObStringBuffer &res_buf, uint64_t &res_card) +{ + int ret = OB_SUCCESS; + uint64_t buckets = 0; + uint64_t buckets_offset = res_buf.length(); + if (!this->is_inited() || !rb->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("ObRoaring64Bin is not inited", K(ret)); + } else if (OB_FAIL(res_buf.reserve(roaring_bin_.length()))) { + LOG_WARN("failed to reserve buffer", K(ret), K(roaring_bin_.length())); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&buckets), sizeof(uint64_t)))) { + LOG_WARN("fail to append buckets"); + } else { + uint64_t l_idx = 0; + uint64_t r_idx = 0; + while(OB_SUCC(ret) && l_idx < buckets_ && r_idx < rb->buckets_) { + uint32_t l_high32 = high32_[l_idx]; + uint32_t r_high32 = rb->high32_[r_idx]; + if (l_high32 == r_high32) { + uint64_t rb32_card = 0; + ObString rb32_bin = nullptr; + if (OB_FAIL(roaring_bufs_[l_idx]->calc_andnot(rb->roaring_bufs_[r_idx], res_buf ,rb32_card, l_high32))) { + LOG_WARN("fail to calculate ObRoaringBin andnot", K(ret), K(l_idx), K(r_idx)); + } else if (rb32_card > 0) { + res_card += rb32_card; + buckets++; + } + l_idx++; + r_idx++; + } else { + if (OB_FAIL(res_buf.append(reinterpret_cast(&l_high32), sizeof(uint32_t)))) { + LOG_WARN("fail to append high32", K(ret), K(l_idx), K(l_high32)); + } else if (OB_FAIL(res_buf.append(roaring_bufs_[l_idx]->get_bin()))) { + LOG_WARN("fail to append roaring_bin", K(ret), K(l_idx)); + } + l_idx++; + buckets++; + } + } // end while + while(OB_SUCC(ret) && l_idx < buckets_) { + uint32_t l_high32 = high32_[l_idx]; + if (OB_FAIL(res_buf.append(reinterpret_cast(&l_high32), sizeof(uint32_t)))) { + LOG_WARN("fail to append high32", K(ret), K(l_idx), K(l_high32)); + } else if (OB_FAIL(res_buf.append(roaring_bufs_[l_idx]->get_bin()))) { + LOG_WARN("fail to append roaring _bin", K(ret), K(l_idx)); + } + l_idx++; + buckets++; + } // end while + } + // modify buckets in res_buf + if (OB_SUCC(ret) && buckets > 0) { + uint64_t *buckets_ptr = reinterpret_cast(res_buf.ptr() + buckets_offset); + *buckets_ptr = buckets; + } + return ret; +} + +uint64_t ObRoaring64Bin::high32_advance_until(uint64_t idx, uint32_t min) +{ + uint64_t res_idx = 0; + uint64_t lower = idx + 1; + if ((lower >= buckets_) || (this->high32_[lower] >= min)) { + res_idx = lower; + } else { + uint64_t spansize = 1; + while ((lower + spansize < buckets_) && (this->high32_[lower + spansize] < min)) { + spansize *= 2; + } + uint64_t upper = (lower + spansize < buckets_) ? lower + spansize : buckets_ - 1; + if (this->high32_[upper] == min) { + res_idx = upper; + } else if (this->high32_[upper] < min) { + // means keyscards_ has no item >= min + res_idx = buckets_; + } else { + lower += (spansize / 2); + uint64_t mid = 0; + while (lower + 1 != upper) { + mid = (lower + upper) / 2; + if (this->high32_[mid] == min) { + return mid; + } else if (this->high32_[mid] < min) { + lower = mid; + } else { + upper = mid; + } + } + res_idx = upper; + } + } + return res_idx; +} + } // namespace common } // namespace oceanbase \ No newline at end of file diff --git a/deps/oblib/src/lib/roaringbitmap/ob_rb_bin.h b/deps/oblib/src/lib/roaringbitmap/ob_rb_bin.h index 871ccb6ca2..4e90ac781c 100644 --- a/deps/oblib/src/lib/roaringbitmap/ob_rb_bin.h +++ b/deps/oblib/src/lib/roaringbitmap/ob_rb_bin.h @@ -14,6 +14,7 @@ #define OCEANBASE_LIB_ROARINGBITMAP_OB_RB_BIN_ #include "ob_roaringbitmap.h" +// #include "roaring/containers/containers.h" namespace oceanbase { namespace common { @@ -26,16 +27,29 @@ public: roaring_bin_(roaring_bin), bin_length_(0), size_(0), + keyscards_(nullptr), + offsets_(nullptr), hasrun_(false), - bitmapOfRunContainers_(nullptr) {} + run_bitmap_(nullptr), + inited_(false) {} virtual ~ObRoaringBin() = default; int init(); + inline bool is_inited() {return inited_;} int get_cardinality(uint64_t &cardinality); - size_t get_bin_length() {return bin_length_;} - int get_container_size(uint32_t n, size_t &container_size); - + int contains(uint32_t value, bool &is_contains); + int calc_and_cardinality(ObRoaringBin *rb, uint64_t &cardinality); + int calc_and(ObRoaringBin *rb, ObStringBuffer &res_buf, uint64_t &res_card, uint32_t high32); + int calc_andnot(ObRoaringBin *rb, ObStringBuffer &res_buf, uint64_t &res_card, uint32_t high32); + inline ObString get_bin() { return roaring_bin_; } + inline size_t get_bin_length() { return bin_length_; } + inline uint16_t get_key_at_index(uint16_t idx) { return keyscards_[idx * 2]; } + inline int32_t get_card_at_index(uint16_t idx) { return keyscards_[idx * 2 + 1] + 1; } + inline bool is_run_at_index(uint16_t idx) { return hasrun_ && (run_bitmap_[idx / 8] & (1 << (idx % 8))) != 0; } + int get_container_size_at_index(uint16_t idx, size_t &container_size); + int get_container_at_index(uint16_t idx, uint8_t &container_type, container_s *&container); + inline int32_t key_advance_until(int32_t idx, uint16_t min); private: ObIAllocator* allocator_; ObString roaring_bin_; @@ -44,7 +58,8 @@ private: uint16_t *keyscards_; uint32_t *offsets_; bool hasrun_; - char *bitmapOfRunContainers_; + char *run_bitmap_; + bool inited_; }; @@ -54,22 +69,32 @@ public: ObRoaring64Bin(ObIAllocator *allocator, ObString &roaring_bin) : allocator_(allocator), roaring_bin_(roaring_bin), - roaring_bufs_(nullptr) {} + buckets_(0), + high32_(nullptr), + roaring_bufs_(nullptr), + inited_(false) {} virtual ~ObRoaring64Bin() = default; int init(); + inline bool is_inited() {return inited_;} + int get_cardinality(uint64_t &cardinality); + int contains(uint64_t value, bool &is_contains); + int calc_and_cardinality(ObRoaring64Bin *rb, uint64_t &cardinality); + int calc_and(ObRoaring64Bin *rb, ObStringBuffer &res_buf, uint64_t &res_card); + int calc_andnot(ObRoaring64Bin *rb, ObStringBuffer &res_buf, uint64_t &res_card); + inline uint64_t high32_advance_until(uint64_t idx, uint32_t min); private: ObIAllocator* allocator_; ObString roaring_bin_; uint64_t buckets_; uint32_t *high32_; - uint32_t *offsets_; ObRoaringBin **roaring_bufs_; + bool inited_; }; } // namespace common } // namespace oceanbase -#endif // OCEANBASE_LIB_ROARINGBITMAP_OB_ROARINGBITMAP_ \ No newline at end of file +#endif // OCEANBASE_LIB_ROARINGBITMAP_OB_RB_BIN_ \ No newline at end of file diff --git a/deps/oblib/src/lib/roaringbitmap/ob_rb_memory_mgr.cpp b/deps/oblib/src/lib/roaringbitmap/ob_rb_memory_mgr.cpp new file mode 100644 index 0000000000..651cda2482 --- /dev/null +++ b/deps/oblib/src/lib/roaringbitmap/ob_rb_memory_mgr.cpp @@ -0,0 +1,184 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "lib/oblog/ob_log.h" +#include "ob_rb_memory_mgr.h" +#include "observer/ob_server.h" + +namespace oceanbase +{ +namespace common +{ +static void *roaring_malloc(size_t size) { + void *res_ptr = nullptr; + void *alloc_ptr = nullptr; + if (size > 0) { + ObRbMemMgr *mem_mgr = nullptr; + uint64_t tenant_id = MTL_ID(); + // reserve header for mem_mgr, tenant_id and size, returning data ptr + // | mem_mgr | tenant_id | size | data | + size_t alloc_size = size + sizeof(ObRbMemMgr *) + sizeof(uint64_t) + sizeof(size_t); + if (OB_INVALID_TENANT_ID == tenant_id) { + // use ob_malloc + alloc_ptr = ob_malloc(alloc_size, lib::ObLabel("RbMemMgr")); + } else if (OB_ISNULL(mem_mgr = MTL(ObRbMemMgr *))) { + int ret = OB_ERR_UNEXPECTED; + LOG_ERROR("mem_mgr is null", K(tenant_id)); + ob_abort(); + } else { + alloc_ptr = mem_mgr->alloc(alloc_size); + } + if (alloc_ptr == nullptr) { + int ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(size)); + throw std::bad_alloc(); + } else { + size_t alloc_location = reinterpret_cast(alloc_ptr); + void *tid_ptr = reinterpret_cast(alloc_location + sizeof(ObRbMemMgr *) ); + size_t tid_location = reinterpret_cast(tid_ptr); + void *size_ptr = reinterpret_cast(tid_location + sizeof(size_t)); + res_ptr = reinterpret_cast(alloc_location + sizeof(ObRbMemMgr *) + sizeof(uint64_t) + sizeof(size_t)); + MEMCPY(alloc_ptr, &mem_mgr, sizeof(ObRbMemMgr *)); + MEMCPY(tid_ptr, &tenant_id, sizeof(uint64_t)); + MEMCPY(size_ptr, &size, sizeof(size_t)); + } + } + return res_ptr; +} + +static void roaring_free(void *ptr) { + if (ptr != nullptr) { + size_t ptr_location = reinterpret_cast(ptr); + void *tid_ptr = reinterpret_cast(ptr_location - sizeof(uint64_t) - sizeof(size_t)); + uint64_t tenant_id = MTL_ID(); + if (tenant_id != *reinterpret_cast(tid_ptr)) { + int ret = OB_ERR_UNEXPECTED; + LOG_ERROR("tenant ID not match", K(tenant_id), K(*reinterpret_cast(tid_ptr))); + ob_abort(); + } else { + size_t alloc_location = ptr_location - sizeof(size_t) - sizeof(uint64_t) - sizeof(ObRbMemMgr *); + void * alloc_ptr = reinterpret_cast(alloc_location); + ObRbMemMgr *mem_mgr = *reinterpret_cast(alloc_ptr); + if (OB_ISNULL(mem_mgr)) { + ob_free(alloc_ptr); + } else { + mem_mgr->free(alloc_ptr); + } + } + } + return; +} + +static void *roaring_realloc(void *ptr, size_t size) { + void *res = nullptr; + if (0 == size) { + roaring_free(ptr); + } else if (NULL == ptr) { + res = roaring_malloc(size); + } else { + res = roaring_malloc(size); + size_t ptr_location = reinterpret_cast(ptr); + void *size_ptr = reinterpret_cast(ptr_location - sizeof(size_t)); + MEMCPY(res, ptr, *reinterpret_cast(size_ptr)); + roaring_free(ptr); + } + return res; +} + +static void *roaring_calloc(size_t item_cnt, size_t size) { + void *res = roaring_malloc(item_cnt * size); + if (res != nullptr) { + MEMSET(res, 0, item_cnt * size); + } + return res; +} + +static void *roaring_aligned_malloc(size_t alignment, size_t size) { + void *res = nullptr; + if (size > 0) { + size_t offset = alignment - 1 + sizeof(void *); + void *orig_ptr = roaring_malloc(size + offset); + if (orig_ptr == nullptr) { + res = orig_ptr; + } else { + size_t orig_location = reinterpret_cast(orig_ptr); + size_t real_location = (orig_location + offset) & ~(alignment - 1); + res = reinterpret_cast(real_location); + size_t orig_ptr_stroage = real_location - sizeof(void *); + *reinterpret_cast(orig_ptr_stroage) = orig_ptr; + } + } + return res; +} + +static void roaring_aligned_free(void *ptr) { + if (ptr != nullptr) { + size_t orig_ptr_stroage = reinterpret_cast(ptr) - sizeof(void *); + roaring_free(*reinterpret_cast(orig_ptr_stroage)); + } + return; +} + +int ObRbMemMgr::init_memory_hook() +{ + int ret = OB_SUCCESS; + roaring_memory_mgr.malloc = roaring_malloc; + roaring_memory_mgr.realloc = roaring_realloc; + roaring_memory_mgr.calloc = roaring_calloc; + roaring_memory_mgr.free = roaring_free; + roaring_memory_mgr.aligned_malloc = roaring_aligned_malloc; + roaring_memory_mgr.aligned_free = roaring_aligned_free; + // initialize global memory hook + roaring_init_memory_hook(roaring_memory_mgr); + return ret; +} + +int ObRbMemMgr::init() +{ + int ret = OB_SUCCESS; + uint64_t tenant_id = MTL_ID(); + lib::ObMemAttr mem_attr(tenant_id, "RoaringBitmap"); + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObRbMemMgr init twice.", K(ret)); + } else if (OB_FAIL(allocator_.init(OB_MALLOC_BIG_BLOCK_SIZE, block_alloc_, mem_attr))) { + LOG_WARN("init allocator failed.", K(ret)); + } else { + allocator_.set_nway(RB_ALLOC_CONCURRENCY); + is_inited_ = true; + } + if (OB_UNLIKELY(!is_inited_)) { + destroy(); + } + return ret; +} + +void ObRbMemMgr::destroy() +{ + allocator_.destroy(); + is_inited_ = false; +} + +void *ObRbMemMgr::alloc(size_t size) +{ + return allocator_.alloc(size); +} + +void ObRbMemMgr::free(void *ptr) +{ + return allocator_.free(ptr); +} + +} // common +} // oceanbase diff --git a/deps/oblib/src/lib/roaringbitmap/ob_rb_memory_mgr.h b/deps/oblib/src/lib/roaringbitmap/ob_rb_memory_mgr.h new file mode 100644 index 0000000000..d849626bd3 --- /dev/null +++ b/deps/oblib/src/lib/roaringbitmap/ob_rb_memory_mgr.h @@ -0,0 +1,57 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEABASE_LIB_OB_RB_MEMORY_MGR_ +#define OCEABASE_LIB_OB_RB_MEMORY_MGR_ + +#include "ob_roaringbitmap.h" +#include "lib/allocator/ob_vslice_alloc.h" +#include "lib/allocator/ob_block_alloc_mgr.h" + +namespace oceanbase +{ +namespace common +{ +static roaring_memory_t roaring_memory_mgr; + +class ObRbMemMgr +{ +public: + static int init_memory_hook(); + +private: + static const int64_t RB_ALLOC_CONCURRENCY = 32; + +public: + ObRbMemMgr() : is_inited_(false), block_alloc_(), allocator_() {} + ~ObRbMemMgr() {} + static int mtl_init(ObRbMemMgr *&rb_allocator) { return rb_allocator->init(); }; + + int init(); + int start() { return OB_SUCCESS; } + void stop() {} + void wait() {} + void destroy(); + + void *alloc(size_t size); + void free(void *ptr); + +private: + bool is_inited_; + common::ObBlockAllocMgr block_alloc_; + common::ObVSliceAlloc allocator_; +}; + +} // common +} // oceanbase + +#endif // OCEABASE_LIB_OB_RB_MEMORY_MGR_ \ No newline at end of file diff --git a/deps/oblib/src/lib/roaringbitmap/ob_rb_utils.cpp b/deps/oblib/src/lib/roaringbitmap/ob_rb_utils.cpp index efb52bb95c..64bde367e3 100644 --- a/deps/oblib/src/lib/roaringbitmap/ob_rb_utils.cpp +++ b/deps/oblib/src/lib/roaringbitmap/ob_rb_utils.cpp @@ -19,7 +19,7 @@ #include "lib/utility/ob_fast_convert.h" #include "lib/roaringbitmap/ob_rb_utils.h" #include "lib/roaringbitmap/ob_rb_bin.h" - +#include "roaring/roaring_array.h" namespace oceanbase { @@ -28,12 +28,29 @@ namespace common const uint64_t max_rb_to_string_cardinality = 1000000; -int ObRbUtils::check_get_bin_type(const ObString &rb_bin, ObRbBinType &bin_type) +int ObRbUtils::get_bin_type(const ObString &rb_bin, ObRbBinType &bin_type) { int ret = OB_SUCCESS; + // get_bin_type + if (rb_bin == nullptr || rb_bin.empty()) { + ret = OB_INVALID_DATA; + LOG_WARN("roaringbitmap binary is empty", K(ret), K(rb_bin)); + } else if (rb_bin.length() < RB_VERSION_SIZE + RB_TYPE_SIZE) { + ret = OB_INVALID_DATA; + LOG_WARN("invalid roaringbitmap binary length", K(ret), K(rb_bin.length())); + } else { + bin_type = static_cast(*(rb_bin.ptr() + RB_VERSION_SIZE)); + } + return ret; +} + +int ObRbUtils::check_binary(const ObString &rb_bin) +{ + int ret = OB_SUCCESS; + ObRbBinType bin_type; uint32_t offset = RB_VERSION_SIZE + RB_TYPE_SIZE; // get_bin_type - if (rb_bin == nullptr) { + if (rb_bin == nullptr || rb_bin.empty()) { ret = OB_INVALID_DATA; LOG_WARN("roaringbitmap binary is empty", K(ret), K(rb_bin)); } else if (rb_bin.length() < RB_VERSION_SIZE + RB_TYPE_SIZE) { @@ -43,7 +60,7 @@ int ObRbUtils::check_get_bin_type(const ObString &rb_bin, ObRbBinType &bin_type) ret = OB_INVALID_DATA; LOG_WARN("invalid version from roaringbitmap binary", K(ret), K(*(rb_bin.ptr()))); } else if (*(rb_bin.ptr() + RB_VERSION_SIZE) >= static_cast(ObRbBinType::MAX_TYPE) - || *(rb_bin.ptr() + RB_VERSION_SIZE) < 0) { + || *(rb_bin.ptr() + RB_VERSION_SIZE) < 0) { ret = OB_INVALID_DATA; LOG_WARN("invalid binary type from roaringbitmap binary", K(ret), K(*(rb_bin.ptr() + RB_VERSION_SIZE))); } else { @@ -129,41 +146,378 @@ int ObRbUtils::check_get_bin_type(const ObString &rb_bin, ObRbBinType &bin_type) return ret; } -int ObRbUtils::get_cardinality(ObIAllocator &allocator, const ObString &rb_bin, ObRbBinType bin_type, uint64_t &cardinality) +int ObRbUtils::build_empty_binary(ObIAllocator &allocator, ObString &res_rb_bin) +{ + int ret = OB_SUCCESS; + uint8_t version = BITMAP_VESION_1; + ObRbBinType bin_type = ObRbBinType::EMPTY; + ObStringBuffer res_buf(&allocator); + if (OB_FAIL(res_buf.append(reinterpret_cast(&version), RB_VERSION_SIZE))) { + LOG_WARN("failed to append version", K(ret)); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&bin_type), RB_BIN_TYPE_SIZE))) { + LOG_WARN("failed to append bin_type", K(ret)); + } else { + res_rb_bin.assign_ptr(res_buf.ptr(), res_buf.length()); + } + return ret; +} + +int ObRbUtils::to_roaring64_bin(ObIAllocator &allocator, ObRbBinType rb_type, ObString &rb_bin, ObString &roaring64_bin) { int ret = OB_SUCCESS; uint32_t offset = RB_VERSION_SIZE + RB_BIN_TYPE_SIZE; - if (bin_type == ObRbBinType::EMPTY) { - cardinality = 0; + if (rb_type == ObRbBinType::BITMAP_64) { + roaring64_bin.assign_ptr(rb_bin.ptr() + offset, rb_bin.length() - offset); + } else if (rb_type == ObRbBinType::BITMAP_32) { + uint64_t map_size = 1; + uint32_t high32 = 0; + ObStringBuffer bin_buf(&allocator); + if (OB_FAIL(bin_buf.append(reinterpret_cast(&map_size), sizeof(uint64_t)))) { + LOG_WARN("failed to append map size", K(ret)); + } else if (OB_FAIL(bin_buf.append(reinterpret_cast(&high32), sizeof(uint32_t)))) { + LOG_WARN("failed to append map prefix", K(ret)); + } else if (OB_FAIL(bin_buf.append(rb_bin.ptr() + offset, rb_bin.length() - offset))) { + LOG_WARN("failed to append serialized string", K(ret), K(rb_bin)); + } else { + roaring64_bin.assign_ptr(bin_buf.ptr(), bin_buf.length()); + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid ObRbBinType", K(ret), K(rb_type)); + } + return ret; +} + +int ObRbUtils::get_cardinality(ObIAllocator &allocator, const ObString &rb_bin, uint64_t &cardinality) +{ + int ret = OB_SUCCESS; + cardinality = 0; + ObRbBinType bin_type; + uint32_t offset = RB_VERSION_SIZE + RB_BIN_TYPE_SIZE; + if (OB_FAIL(get_bin_type(rb_bin, bin_type))) { + LOG_WARN("failed to get binary type", K(ret)); + } else if (bin_type == ObRbBinType::EMPTY) { + // do nothing } else if (bin_type == ObRbBinType::SINGLE_32 || bin_type == ObRbBinType::SINGLE_64) { cardinality = 1; } else if (bin_type == ObRbBinType::SET_32 || bin_type == ObRbBinType::SET_64) { uint8_t value_count = static_cast(*(rb_bin.ptr() + offset)); cardinality = static_cast(value_count); - } else { + } else if (bin_type == ObRbBinType::BITMAP_32) { ObString binary_str; binary_str.assign_ptr(rb_bin.ptr() + offset, rb_bin.length() - offset); - if (bin_type == ObRbBinType::BITMAP_32) { - ObRoaringBin *roaring_bin = NULL; - if (OB_ISNULL(roaring_bin = OB_NEWx(ObRoaringBin, &allocator, &allocator, binary_str))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to alloc memory for ObRoaringBin", K(ret)); - } else if (OB_FAIL(roaring_bin->init())) { - LOG_WARN("failed to get roaring card", K(ret), K(binary_str)); - } else if (OB_FAIL(roaring_bin->get_cardinality(cardinality))) { - LOG_WARN("failed to get roaring card", K(ret), K(binary_str)); + ObRoaringBin *roaring_bin = NULL; + if (OB_ISNULL(roaring_bin = OB_NEWx(ObRoaringBin, &allocator, &allocator, binary_str))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for ObRoaringBin", K(ret)); + } else if (OB_FAIL(roaring_bin->init())) { + LOG_WARN("failed to init ObRoaringBin", K(ret), K(binary_str)); + } else if (OB_FAIL(roaring_bin->get_cardinality(cardinality))) { + LOG_WARN("failed to get roaring card", K(ret), K(binary_str)); + } + } else if (bin_type == ObRbBinType::BITMAP_64) { + ObString binary_str; + binary_str.assign_ptr(rb_bin.ptr() + offset, rb_bin.length() - offset); + ObRoaring64Bin *roaring64_bin = NULL; + if (OB_ISNULL(roaring64_bin = OB_NEWx(ObRoaring64Bin, &allocator, &allocator, binary_str))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(roaring64_bin->init())) { + LOG_WARN("failed to init ObRoaring64Bin", K(ret), K(binary_str)); + } else if (OB_FAIL(roaring64_bin->get_cardinality(cardinality))) { + LOG_WARN("failed to get roaring card", K(ret), K(binary_str)); + } + } + return ret; +} + +int ObRbUtils::get_calc_cardinality(ObIAllocator &allocator, ObString &rb1_bin, ObString &rb2_bin, uint64_t &cardinality, ObRbOperation op) +{ + int ret = OB_SUCCESS; + ObRbBinType rb1_type; + ObRbBinType rb2_type; + uint64_t rb1_card = 0; + uint64_t rb2_card = 0; + uint64_t and_card = 0; + if (OB_FAIL(get_bin_type(rb1_bin, rb1_type))) { + LOG_WARN("invalid left roaringbitmap binary string", K(ret)); + } else if (OB_FAIL(get_bin_type(rb2_bin, rb2_type))) { + LOG_WARN("invalid right roaringbitmap binary string", K(ret)); + } else if (op == ObRbOperation::AND) { + rb1_card = 1; // no need to calculate rb1 cardinality + rb2_card = 1; // no need to calculate rb2 cardinality + if (OB_FAIL(get_and_cardinality(allocator, rb1_bin, rb1_type, rb2_bin, rb2_type, and_card, rb1_card, rb2_card))) { + LOG_WARN("failed to calculate and cardinality", K(ret)); + } else { + cardinality = and_card; + } + } else if (op == ObRbOperation::OR) { + if (OB_FAIL(get_and_cardinality(allocator, rb1_bin, rb1_type, rb2_bin, rb2_type, and_card, rb1_card, rb2_card))) { + LOG_WARN("failed to calculate and cardinality", K(ret)); + } else { + cardinality = rb1_card + rb2_card - and_card; + } + } else if (op == ObRbOperation::XOR) { + if (OB_FAIL(get_and_cardinality(allocator, rb1_bin, rb1_type, rb2_bin, rb2_type, and_card, rb1_card, rb2_card))) { + LOG_WARN("failed to calculate and cardinality", K(ret)); + } else { + cardinality = rb1_card + rb2_card - 2 * and_card; + } + } else if (op == ObRbOperation::ANDNOT) { + rb2_card = 1; // no need to calculate rb2 cardinality + if (OB_FAIL(get_and_cardinality(allocator, rb1_bin, rb1_type, rb2_bin, rb2_type, and_card, rb1_card, rb2_card))) { + LOG_WARN("failed to calculate and cardinality", K(ret)); + } else { + cardinality = rb1_card - and_card; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid RbOperation", K(ret), K(op)); + } + return ret; +} + +int ObRbUtils::get_and_cardinality(ObIAllocator &allocator, + ObString &rb1_bin, + ObRbBinType rb1_type, + ObString &rb2_bin, + ObRbBinType rb2_type, + uint64_t &cardinality, + uint64_t &rb1_card, + uint64_t &rb2_card) +{ + int ret = OB_SUCCESS; + if (!is_bitmap_bin(rb1_type) && !is_bitmap_bin(rb2_type)) { + // do deserivalize for both roaringbitmap + ObRoaringBitmap *rb1 = nullptr; + ObRoaringBitmap *rb2 = nullptr; + if (OB_FAIL(rb_deserialize(allocator, rb1_bin, rb1))) { + LOG_WARN("failed to deserialize left roaringbitmap", K(ret)); + } else if (OB_FAIL(rb_deserialize(allocator, rb2_bin, rb2))) { + LOG_WARN("failed to deserialize right roaringbitmap", K(ret)); + } else if (rb2->is_empty_type()) { + cardinality = 0; + } else if (rb2->is_single_type()) { + if (rb1->is_contains(rb2->get_single_value())) { + cardinality = 1; } - } else if (bin_type == ObRbBinType::BITMAP_64) { - ObRoaring64Bin *roaring64_bin = NULL; - if (OB_ISNULL(roaring64_bin = OB_NEWx(ObRoaring64Bin, &allocator, &allocator, binary_str))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to alloc memory for ObRoaring64Bin", K(ret)); - } else if (OB_FAIL(roaring64_bin->init())) { - LOG_WARN("failed to get roaring card", K(ret), K(binary_str)); - } else if (OB_FAIL(roaring64_bin->get_cardinality(cardinality))) { - LOG_WARN("failed to get roaring card", K(ret), K(binary_str)); + } else if (rb2->is_set_type()) { + hash::ObHashSet *set = rb2->get_set(); + hash::ObHashSet::const_iterator iter; + cardinality = 0; + for (iter = set->begin(); iter != set->end(); iter++) { + if (rb1->is_contains(iter->first)) { + cardinality += 1; + } } } + if (OB_FAIL(ret)) { + } else if (rb1_card == 0 && OB_FALSE_IT(rb1_card = rb1->get_cardinality()) ) { + } else if (rb2_card == 0 && OB_FALSE_IT(rb2_card = rb2->get_cardinality())) { + } + rb_destroy(rb1); + rb_destroy(rb2); + } else if (is_bitmap_bin(rb1_type) && !is_bitmap_bin(rb2_type)) { + // do deserivalize for only right roaringbitmap + ObString rb1_roaring64_bin; + ObRoaring64Bin *rb1 = nullptr; + ObRoaringBitmap *rb2 = nullptr; + if (OB_FAIL(to_roaring64_bin(allocator, rb1_type, rb1_bin, rb1_roaring64_bin))) { + LOG_WARN("failed to get roaring64 binary string from left roaringbitmap", K(ret), K(rb1_type)); + } else if (OB_ISNULL(rb1 = OB_NEWx(ObRoaring64Bin, &allocator, &allocator, rb1_roaring64_bin))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for left ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(rb1->init())) { + LOG_WARN("failed to init left ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(rb_deserialize(allocator, rb2_bin, rb2))) { + LOG_WARN("failed to deserialize right roaringbitmap", K(ret)); + } else if (rb2->is_empty_type()) { + cardinality = 0; + } else if (rb2->is_single_type()) { + bool is_contains = false; + if (OB_FAIL(rb1->contains(rb2->get_single_value(), is_contains))) { + LOG_WARN("failed to check value is_contains", K(ret), K(rb2->get_single_value())); + } else { + cardinality = is_contains? 1 : 0; + } + } else if (rb2->is_set_type()) { + cardinality = 0; + hash::ObHashSet *set = rb2->get_set(); + hash::ObHashSet::const_iterator iter; + for (iter = set->begin(); iter != set->end(); iter++) { + bool is_contains = false; + if (OB_FAIL(rb1->contains(iter->first, is_contains))) { + LOG_WARN("failed to check value is_contains", K(ret), K(iter->first)); + } else if (is_contains) { + cardinality += 1; + } + } + } + if (OB_FAIL(ret)) { + } else if (rb1_card == 0 && OB_FAIL(rb1->get_cardinality(rb1_card))) { + LOG_WARN("failed to get cardinality from left roaringbitmap", K(ret)); + } else if (rb2_card == 0 && OB_FALSE_IT(rb2_card = rb2->get_cardinality())) { + } + rb_destroy(rb2); + } else if (!is_bitmap_bin(rb1_type) && is_bitmap_bin(rb2_type)) { + // switch position of rb1 and rb2 + if (OB_FAIL(get_and_cardinality(allocator, rb2_bin, rb2_type, rb1_bin, rb1_type, cardinality, rb2_card, rb1_card))) { + LOG_WARN("failed to calculate and cardinality", K(ret)); + } + } else if (is_bitmap_bin(rb1_type) && is_bitmap_bin(rb2_type)) { + // no deserialize for roaringbitmap + ObRoaring64Bin *rb1 = nullptr; + ObRoaring64Bin *rb2 = nullptr; + ObString rb1_roaring64_bin; + ObString rb2_roaring64_bin; + if (OB_FAIL(to_roaring64_bin(allocator, rb1_type, rb1_bin, rb1_roaring64_bin))) { + LOG_WARN("failed to get roaring64 binary string from left roaringbitmap", K(ret), K(rb1_type)); + } else if (OB_ISNULL(rb1 = OB_NEWx(ObRoaring64Bin, &allocator, &allocator, rb1_roaring64_bin))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for left ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(rb1->init())) { + LOG_WARN("failed to init left ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(to_roaring64_bin(allocator, rb2_type, rb2_bin, rb2_roaring64_bin))) { + LOG_WARN("failed to get roaring64 binary string from right roaringbitmap", K(ret), K(rb2_type)); + } else if (OB_ISNULL(rb2 = OB_NEWx(ObRoaring64Bin, &allocator, &allocator, rb2_roaring64_bin))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for right ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(rb2->init())) { + LOG_WARN("failed to init right ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(rb1->calc_and_cardinality(rb2, cardinality))) { + LOG_WARN("failed to calculate and cardinality", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (rb1_card == 0 && OB_FAIL(rb1->get_cardinality(rb1_card))) { + LOG_WARN("failed to get cardinality from left roaringbitmap", K(ret)); + } else if (rb2_card == 0 && OB_FAIL(rb2->get_cardinality(rb2_card))) { + LOG_WARN("failed to get cardinality from right roaringbitmap", K(ret)); + } + } + return ret; +} + + +int ObRbUtils::binary_calc(ObIAllocator &allocator, ObString &rb1_bin, ObString &rb2_bin, ObString &res_rb_bin, ObRbOperation op) +{ + int ret = OB_SUCCESS; + ObRbBinType rb1_bin_type = ObRbBinType::EMPTY; + ObRbBinType rb2_bin_type = ObRbBinType::EMPTY; + if (rb1_bin.empty() || rb2_bin.empty()) { + ret = OB_INVALID_DATA; + LOG_WARN("roaringbitmap binary is empty", K(ret), K(rb1_bin), K(rb2_bin)); + } else if (OB_FAIL(get_bin_type(rb1_bin, rb1_bin_type))) { + LOG_WARN("failed to get binary type", K(ret)); + } else if (OB_FAIL(get_bin_type(rb2_bin, rb2_bin_type))) { + LOG_WARN("failed to get binary type", K(ret)); + } else if (op != ObRbOperation::AND && op != ObRbOperation::ANDNOT) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("operaration for roaringbitmap binary calculation except AND and ANDNOT is not supported", K(ret), K(op)); + } else if (rb1_bin_type == ObRbBinType::EMPTY) { + if (op == ObRbOperation::AND) { + res_rb_bin.assign_ptr(rb1_bin.ptr(), rb1_bin.length()); + } else if (op == ObRbOperation::ANDNOT) { + res_rb_bin.assign_ptr(rb1_bin.ptr(), rb1_bin.length()); + } + } else if (rb2_bin_type == ObRbBinType::EMPTY) { + if (op == ObRbOperation::AND) { + res_rb_bin.assign_ptr(rb2_bin.ptr(), rb2_bin.length()); + } else if (op == ObRbOperation::ANDNOT) { + res_rb_bin.assign_ptr(rb1_bin.ptr(), rb1_bin.length()); + } + } else if (!is_bitmap_bin(rb1_bin_type) && !is_bitmap_bin(rb2_bin_type)) { + // if there is no bitmap binary, binary calculation is no effective + // deserivalize rb1 and rb2 -> calculate in place -> serialize rb1 as result + ObRoaringBitmap *rb1 = nullptr; + ObRoaringBitmap *rb2 = nullptr; + if (OB_FAIL(rb_deserialize(allocator, rb1_bin, rb1))) { + LOG_WARN("failed to deserialize roaringbitmap rb1", K(ret), K(rb1_bin)); + } else if (OB_FAIL(rb_deserialize(allocator, rb2_bin, rb2))) { + LOG_WARN("failed to deserialize roaringbitmap rb2", K(ret), K(rb2_bin)); + } else if (OB_FAIL(calc_inplace(rb1, rb2, op))) { + LOG_WARN("failed to calcutlate roaringbitmap inplace", K(ret), K(op), K(rb1), K(rb2)); + } else if (OB_FAIL(rb_serialize(allocator, res_rb_bin, rb1))) { + LOG_WARN("failed to serialize roaringbitmap", K(ret)); + } + rb_destroy(rb1); + rb_destroy(rb2); + } else { + // convert rb1 and rb2 to bitmap binary -> init ObRoaring64Bin -> calculate binary_and + ObString rb1_bitmap_bin; + ObString rb2_bitmap_bin; + ObString rb1_roaring64_bin; + ObString rb2_roaring64_bin; + ObRoaring64Bin *rb1 = nullptr; + ObRoaring64Bin *rb2 = nullptr; + ObStringBuffer res_buf(&allocator); + ObRbBinType res_bin_type = ObRbBinType::BITMAP_64; + uint64_t res_card = 0; + if (OB_FAIL(convert_to_bitmap_binary(allocator, rb1_bin, rb1_bitmap_bin, rb1_bin_type))) { + LOG_WARN("failed to convert rb1_bin to bitmap binary", K(ret)); + } else if (OB_FAIL(to_roaring64_bin(allocator, rb1_bin_type, rb1_bitmap_bin, rb1_roaring64_bin))) { + LOG_WARN("failed to get roaring64 binary string from rb1 roaringbitmap", K(ret), K(rb1_bin_type)); + } else if (OB_ISNULL(rb1 = OB_NEWx(ObRoaring64Bin, &allocator, &allocator, rb1_roaring64_bin))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for rb1 ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(rb1->init())) { + LOG_WARN("failed to init rb1 ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(convert_to_bitmap_binary(allocator, rb2_bin, rb2_bitmap_bin, rb2_bin_type))) { + LOG_WARN("failed to convert rb2_bin to bitmap binary", K(ret)); + } else if (OB_FAIL(to_roaring64_bin(allocator, rb2_bin_type, rb2_bitmap_bin, rb2_roaring64_bin))) { + LOG_WARN("failed to get roaring64 binary string from rb2 roaringbitmap", K(ret), K(rb2_bin_type)); + } else if (OB_ISNULL(rb2 = OB_NEWx(ObRoaring64Bin, &allocator, &allocator, rb2_roaring64_bin))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for rb2 ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(rb2->init())) { + LOG_WARN("failed to init rb2 ObRoaring64Bin", K(ret)); + } else if (OB_FAIL(res_buf.append(rb1_bitmap_bin.ptr(), RB_VERSION_SIZE))) { + LOG_WARN("failed to append version", K(ret)); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&res_bin_type), RB_BIN_TYPE_SIZE))) { + LOG_WARN("failed to append res_bin_type", K(ret)); + } else if (op == ObRbOperation::AND && OB_FAIL(rb1->calc_and(rb2, res_buf, res_card))) { + LOG_WARN("failed to calculate and", K(ret)); + } else if (op == ObRbOperation::ANDNOT && OB_FAIL(rb1->calc_andnot(rb2, res_buf, res_card))) { + LOG_WARN("failed to calculate andnot", K(ret)); + } else if (OB_FALSE_IT(res_rb_bin.assign_ptr(res_buf.ptr(), res_buf.length()))) { + } else if (res_card <= MAX_BITMAP_SET_VALUES) { + // convert to smaller bintype + ObRoaringBitmap *rb = NULL; + if (OB_FAIL(rb_deserialize(allocator, res_rb_bin, rb))) { + LOG_WARN("failed to deserialize roaringbitmap", K(ret)); + } else if OB_FAIL(rb_serialize(allocator, res_rb_bin, rb)) { + LOG_WARN("failed to serialize roaringbitmap", K(ret)); + } + rb_destroy(rb); + } + } + return ret; +} + +int ObRbUtils::calc_inplace(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, ObRbOperation op) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(OB_ISNULL(rb1) || OB_ISNULL(rb2))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("roaringbitmap is null", K(ret), K(rb1), K(rb2)); + } else if (op == ObRbOperation::AND) { + if (OB_FAIL(rb1->value_and(rb2))) { + LOG_WARN("failed to calculate value and", K(ret), K(op)); + } + } else if (op == ObRbOperation::OR) { + if (OB_FAIL(rb1->value_or(rb2))) { + LOG_WARN("failed to calculate value or", K(ret), K(op)); + } + } else if (op == ObRbOperation::XOR) { + if (OB_FAIL(rb1->value_xor(rb2))) { + LOG_WARN("failed to calculate value xor", K(ret), K(op)); + } + } else if (op == ObRbOperation::ANDNOT) { + if (OB_FAIL(rb1->value_andnot(rb2))) { + LOG_WARN("failed to calculate value andnot", K(ret), K(op)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected ObRbOperation", K(ret), K(op)); } return ret; } @@ -175,16 +529,14 @@ void ObRbUtils::rb_destroy(ObRoaringBitmap *&rb) } return; } -int ObRbUtils::rb_deserialize(ObIAllocator &allocator, const ObString &rb_bin, ObRoaringBitmap *&rb) +int ObRbUtils::rb_deserialize(ObIAllocator &allocator, const ObString &rb_bin, ObRoaringBitmap *&rb, bool need_validate) { int ret = OB_SUCCESS; ObRbBinType bin_type; - if (OB_FAIL(check_get_bin_type(rb_bin, bin_type))) { - LOG_WARN("invalid roaringbitmap binary string", K(ret)); - } else if (OB_ISNULL(rb = OB_NEWx(ObRoaringBitmap, &allocator, (&allocator)))) { + if (OB_ISNULL(rb = OB_NEWx(ObRoaringBitmap, &allocator, (&allocator)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to create alloc memory to roaringbitmap", K(ret)); - } else if (OB_FAIL(rb->deserialize(rb_bin))) { + } else if (OB_FAIL(rb->deserialize(rb_bin, need_validate))) { LOG_WARN("failed to deserialize roaringbitmap", K(ret)); } return ret; @@ -207,64 +559,33 @@ int ObRbUtils::rb_serialize(ObIAllocator &allocator, ObString &res_rb_bin, ObRoa int ObRbUtils::build_binary(ObIAllocator &allocator, ObString &rb_bin, ObString &res_rb_bin) { int ret = OB_SUCCESS; - ObRbBinType bin_type; - uint64_t cardinality = 0; - if (OB_FAIL(check_get_bin_type(rb_bin, bin_type))) { + ObRoaringBitmap *rb = NULL; + bool need_validate = true; + // binary_check -> deserialize -> serialize + if (OB_FAIL(check_binary(rb_bin))) { LOG_WARN("invalid roaringbitmap binary string", K(ret)); - } else if (OB_FAIL(get_cardinality(allocator, rb_bin, bin_type, cardinality))) { - LOG_WARN("failed to get cardinality from roaringbitmap binary", K(ret)); - } else if (((bin_type == ObRbBinType::BITMAP_32 || bin_type == ObRbBinType::BITMAP_64) && cardinality <= MAX_BITMAP_SET_VALUES) - || (bin_type == ObRbBinType::SET_32 && cardinality < 2) - || bin_type == ObRbBinType::SET_64) { - // deserialize -> optimize -> serialize - ObRoaringBitmap *rb = NULL; - if (OB_FAIL(rb_deserialize(allocator, rb_bin, rb))) { - LOG_WARN("failed to deserialize roaringbitmap", K(ret)); - } else if (OB_FAIL(rb_serialize(allocator, res_rb_bin, rb))) { - LOG_WARN("failed to serialize roaringbitmap", K(ret)); - } - rb_destroy(rb); - } else if (bin_type == ObRbBinType::BITMAP_64) { - uint32_t offset = RB_VERSION_SIZE + RB_BIN_TYPE_SIZE; - uint64_t buckets = *reinterpret_cast(rb_bin.ptr() + offset); - offset += sizeof(uint64_t); - uint32_t high32 = *reinterpret_cast(rb_bin.ptr() + offset); - offset += sizeof(uint32_t); - if (buckets == 1 && high32 == 0) { - // BITMAP_32 is enough - bin_type = ObRbBinType::BITMAP_32; - ObStringBuffer res_buf(&allocator); - if (OB_FAIL(res_buf.append(rb_bin.ptr(), RB_VERSION_SIZE))) { - LOG_WARN("failed to append version", K(ret), K(rb_bin)); - } else if (OB_FAIL(res_buf.append(reinterpret_cast(&bin_type), RB_BIN_TYPE_SIZE))) { - LOG_WARN("failed to append bin_type", K(ret)); - } else if (OB_FAIL(res_buf.append(rb_bin.ptr() + offset, rb_bin.length() - offset))) { - LOG_WARN("failed to append roaing binary", K(ret), K(rb_bin)); - } else { - res_rb_bin.assign_ptr(res_buf.ptr(), res_buf.length()); - } - } else { - res_rb_bin = rb_bin; - } - } else { - res_rb_bin = rb_bin; + } else if (OB_FAIL(rb_deserialize(allocator, rb_bin, rb, need_validate))) { + LOG_WARN("failed to deserialize roaringbitmap", K(ret)); + } else if OB_FAIL(rb_serialize(allocator, res_rb_bin, rb)) { + LOG_WARN("failed to serialize roaringbitmap", K(ret)); } + rb_destroy(rb); return ret; } -int ObRbUtils::binary_format_convert(ObIAllocator &allocator, const ObString &rb_bin, ObString &binary_str) +int ObRbUtils::convert_to_bitmap_binary(ObIAllocator &allocator, const ObString &rb_bin, ObString &bitmap_bin, ObRbBinType &bin_type) { int ret = OB_SUCCESS; - ObRbBinType bin_type; - if (rb_bin.empty()) { - binary_str = rb_bin; - } else if (OB_FAIL(check_get_bin_type(rb_bin, bin_type))) { - LOG_WARN("invalid roaringbitmap binary string", K(ret)); + if (OB_FAIL(get_bin_type(rb_bin, bin_type))) { + LOG_WARN("failed to get binary type", K(ret)); } else if (bin_type == ObRbBinType::BITMAP_32 || bin_type == ObRbBinType::BITMAP_64) { - binary_str.assign_ptr(rb_bin.ptr(),rb_bin.length()); + // no need to convert + bitmap_bin.assign_ptr(rb_bin.ptr(), rb_bin.length()); } else { + // For empty/single/set type, convert to bitmap type: deserialize -> convert_to_bitmap -> serialize ObRoaringBitmap *rb = NULL; ObStringBuffer res_buf(&allocator); + ObString tmp_res_bin; if (OB_ISNULL(rb = OB_NEWx(ObRoaringBitmap, &allocator, (&allocator)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to create alloc memory to roaringbitmap", K(ret)); @@ -274,14 +595,91 @@ int ObRbUtils::binary_format_convert(ObIAllocator &allocator, const ObString &rb LOG_WARN("failed to convert roaringbitmap to bitmap type", K(ret)); } else if (OB_FAIL(rb->serialize(res_buf))) { LOG_WARN("failed to serialize the roaringbitmap"); - } else { - binary_str.assign_ptr(res_buf.ptr(), res_buf.length()); + } else if (OB_FALSE_IT(bitmap_bin.assign_ptr(res_buf.ptr(), res_buf.length()))) { + } else if(get_bin_type(bitmap_bin, bin_type)) { + LOG_WARN("failed to get binary type", K(ret)); } rb_destroy(rb); } return ret; } +int ObRbUtils::binary_format_convert(ObIAllocator &allocator, const ObString &rb_bin, ObString &binary_str) +{ + int ret = OB_SUCCESS; + ObRbBinType bin_type = ObRbBinType::EMPTY; + ObString bitmap_bin; + if (rb_bin.empty()) { + binary_str.assign_ptr(rb_bin.ptr(), rb_bin.length()); + } else if (OB_FAIL(convert_to_bitmap_binary(allocator, rb_bin, bitmap_bin, bin_type))) { + LOG_WARN("failed to convert rb_bin to bitmap binary", K(ret), K(rb_bin)); + } else if (bin_type == ObRbBinType::BITMAP_32) { + binary_str.assign_ptr(bitmap_bin.ptr(), bitmap_bin.length()); + } else if (bin_type == ObRbBinType::BITMAP_64) { + // check and convert to 32-bit roaring binary + uint32_t offset = RB_VERSION_SIZE + RB_BIN_TYPE_SIZE; + uint64_t buckets = 0; + uint32_t high32 = 0; + if (bitmap_bin.length() < offset + sizeof(uint64_t)) { + ret = OB_INVALID_DATA; + LOG_WARN("ran out of bytes while reading buckets", K(ret), K(bitmap_bin.length())); + } else { + buckets = *reinterpret_cast(bitmap_bin.ptr() + offset); + offset += sizeof(uint64_t); + if (buckets == 0) { + // Generate an empty 32-bit roaring binary + ObStringBuffer res_buf(&allocator); + uint8_t version = *(bitmap_bin.ptr()); + bin_type = ObRbBinType::BITMAP_32; + uint32_t roaring32_cookie = roaring::internal::SERIAL_COOKIE_NO_RUNCONTAINER; + uint32_t container_num = 0; + if (OB_FAIL(res_buf.append(reinterpret_cast(&version), RB_VERSION_SIZE))) { + LOG_WARN("failed to append version", K(ret)); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&bin_type), RB_BIN_TYPE_SIZE))) { + LOG_WARN("failed to append bin_type", K(ret)); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&roaring32_cookie), sizeof(uint32_t)))) { + LOG_WARN("failed to append roaring32_cookie", K(ret)); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&container_num), sizeof(uint32_t)))) { + LOG_WARN("failed to append container_num", K(ret)); + } else { + binary_str.assign_ptr(res_buf.ptr(), res_buf.length()); + } + } else if (buckets == 1) { + // read high32 and check if high32 == 0 + if (bitmap_bin.length() < offset + sizeof(uint32_t)) { + ret = OB_INVALID_DATA; + LOG_WARN("ran out of bytes while reading the first high32", K(ret), K(bitmap_bin.length())); + } else { + high32 = *reinterpret_cast(bitmap_bin.ptr() + offset); + offset += sizeof(uint32_t); + if (high32 == 0) { + // convert to 32-bit roaring binary directly + ObStringBuffer res_buf(&allocator); + uint8_t version = *(bitmap_bin.ptr()); + bin_type = ObRbBinType::BITMAP_32; + if (OB_FAIL(res_buf.append(reinterpret_cast(&version), RB_VERSION_SIZE))) { + LOG_WARN("failed to append version", K(ret)); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&bin_type), RB_BIN_TYPE_SIZE))) { + LOG_WARN("failed to append bin_type", K(ret)); + } else if (OB_FAIL(res_buf.append(bitmap_bin.ptr() + offset, bitmap_bin.length() - offset))) { + LOG_WARN("failed to append roaring binary string" ,K(ret)); + } else { + binary_str.assign_ptr(res_buf.ptr(), res_buf.length()); + } + } else { + // no need to convert + binary_str.assign_ptr(bitmap_bin.ptr(), bitmap_bin.length()); + } + } + } else { + // no need to convert + binary_str.assign_ptr(bitmap_bin.ptr(), bitmap_bin.length()); + } + } + } + return ret; +} + int ObRbUtils::rb_from_string(ObIAllocator &allocator, ObString &rb_str, ObRoaringBitmap *&rb) { int ret = OB_SUCCESS; @@ -334,7 +732,7 @@ int ObRbUtils::rb_to_string(ObIAllocator &allocator, ObString &rb_bin, ObString ObRbBinType bin_type; uint32_t offset = RB_VERSION_SIZE + RB_BIN_TYPE_SIZE; ObStringBuffer res_buf(&allocator); - if (OB_FAIL(check_get_bin_type(rb_bin, bin_type))) { + if (OB_FAIL(get_bin_type(rb_bin, bin_type))) { LOG_WARN("invalid roaringbitmap binary string", K(ret)); } else { switch(bin_type) { @@ -404,28 +802,31 @@ int ObRbUtils::rb_to_string(ObIAllocator &allocator, ObString &rb_bin, ObString bool is_first = true; roaring::api::roaring_bitmap_t *bitmap = nullptr; roaring::api::roaring_uint32_iterator_t *iter = nullptr; - if (OB_ISNULL(bitmap = roaring::api::roaring_bitmap_portable_deserialize_safe(rb_bin.ptr() + offset, rb_bin.length() - offset))) { - ret = OB_DESERIALIZE_ERROR; - LOG_WARN("failed to deserialize the bitmap", K(ret)); - } else if (!roaring::api::roaring_bitmap_internal_validate(bitmap, NULL)) { - ret = OB_INVALID_DATA; - LOG_WARN("bitmap internal consistency checks failed", K(ret)); + ROARING_TRY_CATCH(bitmap = roaring::api::roaring_bitmap_portable_deserialize_safe(rb_bin.ptr() + offset, rb_bin.length() - offset)); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(bitmap)) { + ret = OB_DESERIALIZE_ERROR; + LOG_WARN("failed to deserialize the bitmap", K(ret)); } else if (roaring::api::roaring_bitmap_get_cardinality(bitmap) > max_rb_to_string_cardinality) { ret = OB_NOT_SUPPORTED; LOG_WARN("cardinality of roaringbitmap is over 1000000", K(ret), K(roaring::api::roaring_bitmap_get_cardinality(bitmap))); - } else if (OB_ISNULL(iter = roaring_iterator_create(bitmap))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to get iterate from bitmap", K(ret)); - } else if (iter->has_value) { - do { - ObFastFormatInt ffi(iter->current_value); - if (!is_first && OB_FAIL(res_buf.append(","))) { - LOG_WARN("failed to append res_buf", K(ret)); - } else if (is_first && OB_FALSE_IT(is_first = false)) { - } else if (OB_FAIL(res_buf.append(ffi.ptr(), ffi.length(), 0))) { - LOG_WARN("failed to append res_buf", K(ret), K(iter->current_value), K(ffi.ptr()), K(ffi.length())); - } - } while (OB_SUCC(ret) && roaring::api::roaring_uint32_iterator_advance(iter)); + } else { + ROARING_TRY_CATCH(iter = roaring::api::roaring_iterator_create(bitmap)); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(iter)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to get iterate from bitmap", K(ret)); + } else if (iter->has_value) { + do { + ObFastFormatInt ffi(iter->current_value); + if (!is_first && OB_FAIL(res_buf.append(","))) { + LOG_WARN("failed to append res_buf", K(ret)); + } else if (is_first && OB_FALSE_IT(is_first = false)) { + } else if (OB_FAIL(res_buf.append(ffi.ptr(), ffi.length(), 0))) { + LOG_WARN("failed to append res_buf", K(ret), K(iter->current_value), K(ffi.ptr()), K(ffi.length())); + } + } while (OB_SUCC(ret) && roaring::api::roaring_uint32_iterator_advance(iter)); + } } if (OB_NOT_NULL(iter)) { roaring::api::roaring_uint32_iterator_free(iter); @@ -439,30 +840,33 @@ int ObRbUtils::rb_to_string(ObIAllocator &allocator, ObString &rb_bin, ObString bool is_first = true; roaring::api::roaring64_bitmap_t *bitmap = nullptr; roaring::api::roaring64_iterator_t *iter = nullptr; - if (OB_ISNULL(bitmap = roaring::api::roaring64_bitmap_portable_deserialize_safe( - rb_bin.ptr() + offset, - rb_bin.length() - offset))) { + ROARING_TRY_CATCH(bitmap = roaring::api::roaring64_bitmap_portable_deserialize_safe( + rb_bin.ptr() + offset, + rb_bin.length() - offset)); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(bitmap)) { ret = OB_DESERIALIZE_ERROR; LOG_WARN("failed to deserialize the bitmap", K(ret)); - } else if (!roaring::api::roaring64_bitmap_internal_validate(bitmap, NULL)) { - ret = OB_INVALID_DATA; - LOG_WARN("bitmap internal consistency checks failed", K(ret)); } else if (roaring::api::roaring64_bitmap_get_cardinality(bitmap) > max_rb_to_string_cardinality) { ret = OB_NOT_SUPPORTED; LOG_WARN("cardinality of roaringbitmap is over 1000000", K(ret), K(roaring::api::roaring64_bitmap_get_cardinality(bitmap))); - } else if (OB_ISNULL(iter = roaring::api::roaring64_iterator_create(bitmap))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to get iterate from bitmap", K(ret)); - } else if (roaring::api::roaring64_iterator_has_value(iter)) { - do { - ObFastFormatInt ffi(roaring::api::roaring64_iterator_value(iter)); - if (!is_first && OB_FAIL(res_buf.append(","))) { - LOG_WARN("failed to append res_buf", K(ret)); - } else if (is_first && OB_FALSE_IT(is_first = false)) { - } else if (OB_FAIL(res_buf.append(ffi.ptr(), ffi.length(), 0))) { - LOG_WARN("failed to append res_buf", K(ret), K(roaring::api::roaring64_iterator_value(iter)), K(ffi.ptr()), K(ffi.length())); - } - } while (OB_SUCC(ret) && roaring::api::roaring64_iterator_advance(iter)); + } else { + ROARING_TRY_CATCH(iter = roaring::api::roaring64_iterator_create(bitmap)); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(iter)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to get iterate from bitmap", K(ret)); + } else if (roaring::api::roaring64_iterator_has_value(iter)) { + do { + ObFastFormatInt ffi(roaring::api::roaring64_iterator_value(iter)); + if (!is_first && OB_FAIL(res_buf.append(","))) { + LOG_WARN("failed to append res_buf", K(ret)); + } else if (is_first && OB_FALSE_IT(is_first = false)) { + } else if (OB_FAIL(res_buf.append(ffi.ptr(), ffi.length(), 0))) { + LOG_WARN("failed to append res_buf", K(ret), K(roaring::api::roaring64_iterator_value(iter)), K(ffi.ptr()), K(ffi.length())); + } + } while (OB_SUCC(ret) && roaring::api::roaring64_iterator_advance(iter)); + } } if (OB_NOT_NULL(iter)) { roaring::api::roaring64_iterator_free(iter); @@ -486,99 +890,6 @@ int ObRbUtils::rb_to_string(ObIAllocator &allocator, ObString &rb_bin, ObString return ret; } -int ObRbUtils::and_cardinality(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, uint64_t &cardinality) -{ - int ret = OB_SUCCESS; - cardinality = 0; - if (rb2->is_bitmap_type() && !rb1->is_bitmap_type()) { - ret = and_cardinality(rb2, rb1, cardinality); - } else if (rb2->is_empty_type()) { - // do noting - } else if (rb2->is_single_type()) { - if (rb1->is_contains(rb2->get_single_value())) { - cardinality += 1; - } - } else if (rb2->is_set_type()) { - hash::ObHashSet *set = rb2->get_set(); - hash::ObHashSet::const_iterator iter; - for (iter = set->begin(); iter != set->end(); iter++) { - if (rb1->is_contains(iter->first)) { - cardinality += 1; - } - } - } else { // both rb1 and rb2 is bitmap type - cardinality = roaring::api::roaring64_bitmap_and_cardinality(rb1->get_bitmap(), rb2->get_bitmap()); - } - return ret; -} - -int ObRbUtils::or_cardinality(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, uint64_t &cardinality) -{ - int ret = OB_SUCCESS; - uint64_t c1 = rb1->get_cardinality(); - uint64_t c2 = rb2->get_cardinality(); - uint64_t inter = 0; - if (OB_FAIL(and_cardinality(rb1, rb2, inter))) { - LOG_WARN("failed to get and_cardinality", K(ret)); - } else { - cardinality = c1 + c2 - inter; - } - return ret; -} - -int ObRbUtils::xor_cardinality(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, uint64_t &cardinality) -{ - int ret = OB_SUCCESS; - uint64_t c1 = rb1->get_cardinality(); - uint64_t c2 = rb2->get_cardinality(); - uint64_t inter = 0; - if (OB_FAIL(and_cardinality(rb1, rb2, inter))) { - LOG_WARN("failed to get and_cardinality", K(ret)); - } else { - cardinality = c1 + c2 - 2 * inter; - } - return ret; -} - -int ObRbUtils::andnot_cardinality(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, uint64_t &cardinality) -{ - int ret = OB_SUCCESS; - uint64_t c1 = rb1->get_cardinality(); - uint64_t inter = 0; - if (OB_FAIL(and_cardinality(rb1, rb2, inter))) { - LOG_WARN("failed to get and_cardinality", K(ret)); - } else { - cardinality = c1 - inter; - } - return ret; -} - -int ObRbUtils::calc_cardinality(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, uint64_t &cardinality, ObRbOperation op) -{ - int ret = OB_SUCCESS; - if (op == ObRbOperation::AND) { - if (OB_FAIL(and_cardinality(rb1, rb2, cardinality))) { - LOG_WARN("failed to calculate cardinality", K(ret), K(op)); - } - } else if (op == ObRbOperation::OR) { - if (OB_FAIL(or_cardinality(rb1, rb2, cardinality))) { - LOG_WARN("failed to calculate cardinality", K(ret), K(op)); - } - } else if (op == ObRbOperation::XOR) { - if (OB_FAIL(xor_cardinality(rb1, rb2, cardinality))) { - LOG_WARN("failed to calculate cardinality", K(ret), K(op)); - } - } else if (op == ObRbOperation::ANDNOT) { - if (OB_FAIL(andnot_cardinality(rb1, rb2, cardinality))) { - LOG_WARN("failed to calculate cardinality", K(ret), K(op)); - } - } else { - ret = OB_NOT_SUPPORTED; - LOG_WARN("roaringbitmap operation not supported", K(ret), K(op)); - } - return ret; -} - int ObRbUtils::str_read_value_(const char *str, size_t len, char *&value_end, uint64_t &value) { int ret = OB_SUCCESS; diff --git a/deps/oblib/src/lib/roaringbitmap/ob_rb_utils.h b/deps/oblib/src/lib/roaringbitmap/ob_rb_utils.h index 2bc20dad71..a7afda4f41 100644 --- a/deps/oblib/src/lib/roaringbitmap/ob_rb_utils.h +++ b/deps/oblib/src/lib/roaringbitmap/ob_rb_utils.h @@ -14,6 +14,7 @@ #define OCEANBASE_LIB_ROARINGBITMAP_OB_RB_UTILS_ #include "ob_roaringbitmap.h" +#include "ob_rb_bin.h" #include "lib/string/ob_string.h" #include "lib/string/ob_string_buffer.h" @@ -27,26 +28,39 @@ public: virtual ~ObRbUtils() = default; // binary operation - static int check_get_bin_type(const ObString &rb_bin, ObRbBinType &bin_type); - static int get_cardinality(ObIAllocator &allocator, const ObString &rb_bin, ObRbBinType bin_type, uint64_t &cardinality); + static int get_bin_type(const ObString &rb_bin, ObRbBinType &bin_type); + static int check_binary(const ObString &rb_bin); + static int build_empty_binary(ObIAllocator &allocator, ObString &res_rb_bin); + static int to_roaring64_bin(ObIAllocator &allocator, ObRbBinType rb_type, ObString &rb_bin, ObString &roaring64_bin); + static int get_cardinality(ObIAllocator &allocator, const ObString &rb_bin, uint64_t &cardinality); + static int get_calc_cardinality(ObIAllocator &allocator, ObString &rb1_bin, ObString &rb2_bin, uint64_t &cardinality, ObRbOperation op); + static int get_and_cardinality(ObIAllocator &allocator, + ObString &rb1_bin, + ObRbBinType rb1_type, + ObString &rb2_bin, + ObRbBinType rb2_type, + uint64_t &cardinality, + uint64_t &rb1_card, + uint64_t &rb2_card); + static int binary_calc(ObIAllocator &allocator, ObString &rb1_bin, ObString &rb2_bin, ObString &res_rb_bin, ObRbOperation op); + static int calc_inplace(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, ObRbOperation op); // common static void rb_destroy(ObRoaringBitmap *&rb); - static int rb_deserialize(ObIAllocator &allocator, const ObString &rb_bin, ObRoaringBitmap *&rb); + static int rb_deserialize(ObIAllocator &allocator, const ObString &rb_bin, ObRoaringBitmap *&rb, bool need_validate = false); static int rb_serialize(ObIAllocator &allocator, ObString &res_rb_bin, ObRoaringBitmap *&rb); static int build_binary(ObIAllocator &allocator, ObString &rb_bin, ObString &res_rb_bin); + static int convert_to_bitmap_binary(ObIAllocator &allocator, const ObString &rb_bin, ObString &bitmap_bin, ObRbBinType &bin_type); static int binary_format_convert(ObIAllocator &allocator, const ObString &rb_bin, ObString &roaring_bin); static int rb_from_string(ObIAllocator &allocator, ObString &rb_str, ObRoaringBitmap *&rb); static int rb_to_string(ObIAllocator &allocator, ObString &rb_bin, ObString &res_rb_str); // calculate - static int and_cardinality(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, uint64_t &cardinality); - static int or_cardinality(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, uint64_t &cardinality); - static int xor_cardinality(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, uint64_t &cardinality); - static int andnot_cardinality(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, uint64_t &cardinality); - static int calc_cardinality(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, uint64_t &cardinality, ObRbOperation op); static int rb_calc_equals(ObRoaringBitmap *&rb1, ObRoaringBitmap *&rb2, bool &result); //not impl + // check + static inline bool is_bitmap_bin(ObRbBinType bintype) {return ObRbBinType::BITMAP_32 == bintype || ObRbBinType::BITMAP_64 == bintype;} + private: inline static void str_skip_space_(const char *&str, const char *end) { while (str < end && (*str == ' ' || *str == '\0')) { @@ -61,4 +75,4 @@ private: } // namespace common } // namespace oceanbase -#endif // OCEANBASE_LIB_ROARINGBITMAP_OB_ROARINGBITMAP_ \ No newline at end of file +#endif // OCEANBASE_LIB_ROARINGBITMAP_OB_RB_UTILS_ \ No newline at end of file diff --git a/deps/oblib/src/lib/roaringbitmap/ob_roaringbitmap.cpp b/deps/oblib/src/lib/roaringbitmap/ob_roaringbitmap.cpp index 84b0f8f5eb..9b645c82c1 100644 --- a/deps/oblib/src/lib/roaringbitmap/ob_roaringbitmap.cpp +++ b/deps/oblib/src/lib/roaringbitmap/ob_roaringbitmap.cpp @@ -67,7 +67,8 @@ bool ObRoaringBitmap::is_contains(uint64_t value) } return res; } -int ObRoaringBitmap::value_add(uint64_t value) { +int ObRoaringBitmap::value_add(uint64_t value) +{ int ret = OB_SUCCESS; switch (type_) { case ObRbType::EMPTY: { @@ -105,7 +106,7 @@ int ObRoaringBitmap::value_add(uint64_t value) { break; } case ObRbType::BITMAP: { - roaring::api::roaring64_bitmap_add(bitmap_, value); + ROARING_TRY_CATCH(roaring::api::roaring64_bitmap_add(bitmap_, value)); break; } default: { @@ -113,7 +114,7 @@ int ObRoaringBitmap::value_add(uint64_t value) { LOG_WARN("unknown RbType", K(ret), K(type_)); break; } - } // end switch + } // end switch return ret; } @@ -137,7 +138,7 @@ int ObRoaringBitmap::value_remove(uint64_t value) { break; } case ObRbType::BITMAP: { - roaring::api::roaring64_bitmap_remove(bitmap_, value); + ROARING_TRY_CATCH(roaring::api::roaring64_bitmap_remove(bitmap_, value)); break; } default: { @@ -152,54 +153,55 @@ int ObRoaringBitmap::value_remove(uint64_t value) { int ObRoaringBitmap::value_and(ObRoaringBitmap *rb) { int ret = OB_SUCCESS; - if (is_empty_type()) { - //do nothing - } else if (is_single_type()) { - if (rb->is_contains(single_value_)) { - // do nothing - } else { - set_empty(); - } - } else if (is_set_type()) { - hash::ObHashSet::const_iterator iter = set_.begin(); - int set_size = set_.size(); - for (int i = 0; OB_SUCC(ret) && i < set_size; i++) { - if (i != 0) { - iter++; - } - if (!rb->is_contains(iter->first) && OB_FAIL(value_remove(iter->first))) { - LOG_WARN("failed to remove value", K(ret), K(iter->first)); - } - } - } else if (is_bitmap_type()) { - if (rb->is_empty_type()) { - set_empty(); - } else if (rb->is_single_type()) { - if (is_contains(rb->single_value_)) { - set_single(rb->single_value_); + if (is_empty_type()) { + //do nothing + } else if (is_single_type()) { + if (rb->is_contains(single_value_)) { + // do nothing } else { set_empty(); } - } else if (rb->is_set_type()) { - if (OB_FAIL(set_.create(MAX_BITMAP_SET_VALUES))) { - LOG_WARN("failed to create set", K(ret)); - } else { - hash::ObHashSet::const_iterator iter; - for (iter = rb->set_.begin(); OB_SUCC(ret) && iter != rb->set_.end(); iter++) { - if (is_contains(iter->first) && OB_FAIL(set_.set_refactored(iter->first))) { - LOG_WARN("failed to set_refactored to ObHashSet", K(ret), K(iter->first)); - } + } else if (is_set_type()) { + hash::ObHashSet::const_iterator iter = set_.begin(); + int set_size = set_.size(); + for (int i = 0; OB_SUCC(ret) && i < set_size; i++) { + if (i != 0) { + iter++; + } + if (!rb->is_contains(iter->first) && OB_FAIL(value_remove(iter->first))) { + LOG_WARN("failed to remove value", K(ret), K(iter->first)); } } - if (OB_FAIL(ret)) { - } else { - roaring::api::roaring64_bitmap_free(bitmap_); - bitmap_ = NULL; - type_ = ObRbType::SET; + } else if (is_bitmap_type()) { + if (rb->is_empty_type()) { + set_empty(); + } else if (rb->is_single_type()) { + if (is_contains(rb->single_value_)) { + set_single(rb->single_value_); + } else { + set_empty(); + } + } else if (rb->is_set_type()) { + if (OB_FAIL(set_.create(MAX_BITMAP_SET_VALUES))) { + LOG_WARN("failed to create set", K(ret)); + } else { + hash::ObHashSet::const_iterator iter; + for (iter = rb->set_.begin(); OB_SUCC(ret) && iter != rb->set_.end(); iter++) { + if (is_contains(iter->first) && OB_FAIL(set_.set_refactored(iter->first))) { + LOG_WARN("failed to set_refactored to ObHashSet", K(ret), K(iter->first)); + } + } + } + if (OB_SUCC(ret)) { + type_ = ObRbType::SET; + if (OB_NOT_NULL(bitmap_)) { + roaring::api::roaring64_bitmap_free(bitmap_); + bitmap_ = NULL; + } + } + } else if (rb->is_bitmap_type()) { + ROARING_TRY_CATCH(roaring::api::roaring64_bitmap_and_inplace(bitmap_, rb->bitmap_)); } - } else if (rb->is_bitmap_type()) { - roaring::api::roaring64_bitmap_and_inplace(bitmap_, rb->bitmap_); - } } return ret; } @@ -207,39 +209,25 @@ int ObRoaringBitmap::value_and(ObRoaringBitmap *rb) int ObRoaringBitmap::value_or(ObRoaringBitmap *rb) { int ret = OB_SUCCESS; - if (rb->is_empty_type()) { - // do nothing - } else if (rb->is_single_type()) { - if (OB_FAIL(value_add(rb->single_value_))) { - LOG_WARN("failed to add value", K(ret), K(rb->single_value_)); - } - } else if (rb->is_set_type()) { - hash::ObHashSet::const_iterator iter; - for (iter = rb->set_.begin(); OB_SUCC(ret) && iter != rb->set_.end(); iter++) { - if (OB_FAIL(value_add(iter->first))) { - LOG_WARN("failed to add value", K(ret), K(iter->first)); + if (rb->is_empty_type()) { + // do nothing + } else if (rb->is_single_type()) { + if (OB_FAIL(value_add(rb->single_value_))) { + LOG_WARN("failed to add value", K(ret), K(rb->single_value_)); } - } - } else if (rb->is_bitmap_type()) { - if (is_bitmap_type()) { - roaring::api::roaring64_bitmap_or_inplace(bitmap_, rb->bitmap_); - } else if(OB_ISNULL(bitmap_ = roaring::api::roaring64_bitmap_copy(rb->bitmap_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to copy bitmap", K(ret)); - } else if (is_empty_type()) { - type_ = ObRbType::BITMAP; - } else if (is_single_type()) { - roaring::api::roaring64_bitmap_add(bitmap_, single_value_); - single_value_ = 0; - type_ = ObRbType::BITMAP; - } else if (is_set_type()) { + } else if (rb->is_set_type()) { hash::ObHashSet::const_iterator iter; - for (iter = set_.begin(); iter != set_.end(); iter++) { - roaring::api::roaring64_bitmap_add(bitmap_, iter->first); + for (iter = rb->set_.begin(); OB_SUCC(ret) && iter != rb->set_.end(); iter++) { + if (OB_FAIL(value_add(iter->first))) { + LOG_WARN("failed to add value", K(ret), K(iter->first)); + } + } + } else if (rb->is_bitmap_type()) { + if (OB_FAIL(convert_to_bitmap())) { + LOG_WARN("failed to convert roaringbitmap to bitmap type", K(ret)); + } else { + ROARING_TRY_CATCH(roaring::api::roaring64_bitmap_or_inplace(bitmap_, rb->bitmap_)); } - set_.destroy(); - type_ = ObRbType::BITMAP; - } } return ret; } @@ -247,59 +235,37 @@ int ObRoaringBitmap::value_or(ObRoaringBitmap *rb) int ObRoaringBitmap::value_xor(ObRoaringBitmap *rb) { int ret = OB_SUCCESS; - if (rb->is_empty_type()) { - // do nothing - } else if (rb->is_single_type()) { - if (is_contains(rb->single_value_)) { - if (OB_FAIL(value_remove(rb->single_value_))) { - LOG_WARN("failed to remove value", K(ret), K(rb->single_value_)); - } - } else { - if (OB_FAIL(value_add(rb->single_value_))) { - LOG_WARN("failed to add value", K(ret), K(rb->single_value_)); - } - } - } else if (rb->is_set_type()) { - hash::ObHashSet::const_iterator iter; - for (iter = rb->set_.begin(); OB_SUCC(ret) && iter != rb->set_.end(); iter++) { - if (is_contains(iter->first)) { - if (OB_FAIL(value_remove(iter->first))) { - LOG_WARN("failed to remove value", K(ret), K(iter->first)); + if (rb->is_empty_type()) { + // do nothing + } else if (rb->is_single_type()) { + if (is_contains(rb->single_value_)) { + if (OB_FAIL(value_remove(rb->single_value_))) { + LOG_WARN("failed to remove value", K(ret), K(rb->single_value_)); } } else { - if (OB_FAIL(value_add(iter->first))) { - LOG_WARN("failed to add value", K(ret), K(iter->first)); + if (OB_FAIL(value_add(rb->single_value_))) { + LOG_WARN("failed to add value", K(ret), K(rb->single_value_)); } } - } - } else if (rb->is_bitmap_type()) { - if (is_bitmap_type()) { - roaring::api::roaring64_bitmap_xor_inplace(bitmap_, rb->bitmap_); - } else if(OB_ISNULL(bitmap_ = roaring::api::roaring64_bitmap_copy(rb->bitmap_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to copy bitmap", K(ret)); - } else if (is_empty_type()) { - type_ = ObRbType::BITMAP; - } else if (is_single_type()) { - if (roaring::api::roaring64_bitmap_contains(bitmap_, single_value_)) { - roaring::api::roaring64_bitmap_remove(bitmap_, single_value_); - } else { - roaring::api::roaring64_bitmap_add(bitmap_, single_value_); - } - single_value_ = 0; - type_ = ObRbType::BITMAP; - } else if (is_set_type()) { + } else if (rb->is_set_type()) { hash::ObHashSet::const_iterator iter; - for (iter = set_.begin(); iter != set_.end(); iter++) { - if (roaring::api::roaring64_bitmap_contains(bitmap_, iter->first)) { - roaring::api::roaring64_bitmap_remove(bitmap_, iter->first); + for (iter = rb->set_.begin(); OB_SUCC(ret) && iter != rb->set_.end(); iter++) { + if (is_contains(iter->first)) { + if (OB_FAIL(value_remove(iter->first))) { + LOG_WARN("failed to remove value", K(ret), K(iter->first)); + } } else { - roaring::api::roaring64_bitmap_add(bitmap_, iter->first); + if (OB_FAIL(value_add(iter->first))) { + LOG_WARN("failed to add value", K(ret), K(iter->first)); + } } } - set_.destroy(); - type_ = ObRbType::BITMAP; - } + } else if (rb->is_bitmap_type()) { + if (OB_FAIL(convert_to_bitmap())) { + LOG_WARN("failed to convert roaringbitmap to bitmap type", K(ret)); + } else { + ROARING_TRY_CATCH(roaring::api::roaring64_bitmap_xor_inplace(bitmap_, rb->bitmap_)); + } } return ret; } @@ -339,45 +305,49 @@ int ObRoaringBitmap::value_andnot(ObRoaringBitmap *rb) } } } else if (is_bitmap_type()) { - roaring::api::roaring64_bitmap_andnot_inplace(bitmap_, rb->bitmap_); + ROARING_TRY_CATCH(roaring::api::roaring64_bitmap_andnot_inplace(bitmap_, rb->bitmap_)); } } return ret; } -int ObRoaringBitmap::value_calc(ObRoaringBitmap *rb, ObRbOperation op) -{ - int ret = OB_SUCCESS; - if (op == ObRbOperation::AND) { - if (OB_FAIL(value_and(rb))) { - LOG_WARN("failed to calculate value and", K(ret), K(op)); - } - } else if (op == ObRbOperation::OR) { - if (OB_FAIL(value_or(rb))) { - LOG_WARN("failed to calculate value or", K(ret), K(op)); - } - } else if (op == ObRbOperation::XOR) { - if (OB_FAIL(value_xor(rb))) { - LOG_WARN("failed to calculate value xor", K(ret), K(op)); - } - } else if (op == ObRbOperation::ANDNOT) { - if (OB_FAIL(value_andnot(rb))) { - LOG_WARN("failed to calculate value andnot", K(ret), K(op)); - } - } else { - ret = OB_NOT_SUPPORTED; - LOG_WARN("roaringbitmap operation not supported", K(ret), K(op)); - } - return ret; -} - int ObRoaringBitmap::optimize() { int ret = OB_SUCCESS; - if (is_bitmap_type() && OB_FAIL(convert_bitmap_to_smaller_type())) { - LOG_WARN("failed to convert bitmap to smaller type"); + uint64_t cardinality = get_cardinality(); + if (is_bitmap_type()) { + // bitmap -> set/single/empty + if (cardinality == 0) { + set_empty(); + } else if (cardinality == 1) { + set_single(roaring::api::roaring64_bitmap_minimum(bitmap_)); + } else if (cardinality <= MAX_BITMAP_SET_VALUES) { + roaring::api::roaring64_iterator_t* it = nullptr; + ROARING_TRY_CATCH(it = roaring::api::roaring64_iterator_create(bitmap_)); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(it)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create bitmap iterator", K(ret)); + } else if (OB_FAIL(set_.create(MAX_BITMAP_SET_VALUES))) { + LOG_WARN("failed to create set", K(ret)); + } else if (OB_FALSE_IT(type_ = ObRbType::SET)) { + } else { + do { + if (OB_FAIL(set_.set_refactored(roaring::api::roaring64_iterator_value(it)))) { + LOG_WARN("failed to set value to the set", K(ret), K(roaring::api::roaring64_iterator_value(it))); + } + } while (roaring::api::roaring64_iterator_advance(it) && OB_SUCC(ret)); + } + if (OB_NOT_NULL(it)) { + roaring::api::roaring64_iterator_free(it); + } + if (OB_NOT_NULL(bitmap_)) { + roaring::api::roaring64_bitmap_free(bitmap_); + bitmap_ = nullptr; + } + } } else if (is_set_type()){ - uint64_t cardinality = static_cast(set_.size()); + // set -> single/empty if (cardinality == 0) { set_empty(); } else if (cardinality == 1) { @@ -387,96 +357,124 @@ int ObRoaringBitmap::optimize() return ret; } -int ObRoaringBitmap::deserialize(const ObString &rb_bin) +int ObRoaringBitmap::deserialize(const ObString &rb_bin, bool need_validate) { int ret = OB_SUCCESS; - uint32_t offset = RB_VERSION_SIZE + RB_BIN_TYPE_SIZE; - version_ = *(rb_bin.ptr()); - ObRbBinType bin_type = static_cast(*(rb_bin.ptr() + RB_VERSION_SIZE)); - switch (bin_type) { - case ObRbBinType::EMPTY: { - set_empty(); - break; - } - case ObRbBinType::SINGLE_32: { - uint32_t value_32 = *reinterpret_cast(rb_bin.ptr() + offset); - set_single(static_cast(value_32)); - break; - } - case ObRbBinType::SINGLE_64: { - set_single(*reinterpret_cast(rb_bin.ptr() + offset)); - break; - } - case ObRbBinType::SET_32: { - uint32_t value_32 = 0; - uint8_t value_count = static_cast(*(rb_bin.ptr() + offset)); - offset += RB_VALUE_COUNT_SIZE; - for (int i = 0; OB_SUCC(ret) && i < value_count; i++) { - value_32 = *reinterpret_cast(rb_bin.ptr() + offset); - offset += sizeof(uint32_t); - if (OB_FAIL(value_add(static_cast(value_32)))) { - LOG_WARN("failed to add value to roaringbtimap", K(ret), K(value_32)); + if (rb_bin.empty()) { + ret = OB_INVALID_DATA; + LOG_WARN("rb_bin is empty", K(ret)); + } else { + uint32_t offset = RB_VERSION_SIZE + RB_BIN_TYPE_SIZE; + version_ = *(rb_bin.ptr()); + ObRbBinType bin_type = static_cast(*(rb_bin.ptr() + RB_VERSION_SIZE)); + set_empty(); + switch (bin_type) { + case ObRbBinType::EMPTY: { + // do nothing + break; + } + case ObRbBinType::SINGLE_32: { + uint32_t value_32 = *reinterpret_cast(rb_bin.ptr() + offset); + set_single(static_cast(value_32)); + break; + } + case ObRbBinType::SINGLE_64: { + set_single(*reinterpret_cast(rb_bin.ptr() + offset)); + break; + } + case ObRbBinType::SET_32: { + uint32_t value_32 = 0; + uint32_t pre_val = 0; + uint8_t value_count = static_cast(*(rb_bin.ptr() + offset)); + offset += RB_VALUE_COUNT_SIZE; + for (int i = 0; OB_SUCC(ret) && i < value_count; i++) { + value_32 = *reinterpret_cast(rb_bin.ptr() + offset); + offset += sizeof(uint32_t); + if (need_validate) { + if (i == 0) { + pre_val = value_32; + } else if (value_32 <= pre_val) { + ret = OB_INVALID_DATA; + LOG_WARN("invalid roaringbitmap set binary", K(ret), K(i), K(value_32), K(pre_val)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(value_add(static_cast(value_32)))) { + LOG_WARN("failed to add value to roaringbtimap", K(ret), K(value_32)); + } } + break; } - break; - } - case ObRbBinType::SET_64: { - uint64_t value_64 = 0; - uint8_t value_count = static_cast(*(rb_bin.ptr() + offset)); - offset += RB_VALUE_COUNT_SIZE; - for (int i = 0; OB_SUCC(ret) && i < value_count; i++) { - value_64 = *reinterpret_cast(rb_bin.ptr() + offset); - offset += sizeof(uint64_t); - if (OB_FAIL(value_add(value_64))) { - LOG_WARN("failed to add value to rb", K(ret), K(value_64)); + case ObRbBinType::SET_64: { + uint64_t value_64 = 0; + uint64_t pre_val = 0; + uint8_t value_count = static_cast(*(rb_bin.ptr() + offset)); + offset += RB_VALUE_COUNT_SIZE; + for (int i = 0; OB_SUCC(ret) && i < value_count; i++) { + value_64 = *reinterpret_cast(rb_bin.ptr() + offset); + offset += sizeof(uint64_t); + if (need_validate) { + if (i == 0) { + pre_val = value_64; + } else if (value_64 <= pre_val) { + ret = OB_INVALID_DATA; + LOG_WARN("invalid roaringbitmap set binary", K(ret), K(i), K(value_64), K(pre_val)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(value_add(value_64))) { + LOG_WARN("failed to add value to roaringbtimap", K(ret), K(value_64)); + } } + break; } - break; - } - case ObRbBinType::BITMAP_32: { - uint64_t serial_size = sizeof(uint64_t) + sizeof(uint32_t) + rb_bin.length() - offset; - uint64_t map_size = 1; - uint32_t map_prefix = 0; - ObStringBuffer tmp_buf(allocator_); - if (OB_FAIL(tmp_buf.append(reinterpret_cast(&map_size), sizeof(uint64_t)))) { - LOG_WARN("failed to append map size", K(ret)); - } else if (OB_FAIL(tmp_buf.append(reinterpret_cast(&map_prefix), sizeof(uint32_t)))) { - LOG_WARN("failed to append map prefix", K(ret)); - } else if (OB_FAIL(tmp_buf.append(rb_bin.ptr() + offset, rb_bin.length() - offset))) { - LOG_WARN("failed to append serialized string", K(ret), K(rb_bin)); - } else if (OB_ISNULL(bitmap_ = roaring::api::roaring64_bitmap_portable_deserialize_safe( - tmp_buf.ptr(), - tmp_buf.length()))) { - ret = OB_DESERIALIZE_ERROR; - LOG_WARN("failed to deserialize the bitmap", K(ret)); - } else if (!roaring::api::roaring64_bitmap_internal_validate(bitmap_, NULL)) { - ret = OB_INVALID_DATA; - LOG_WARN("bitmap internal consistency checks failed", K(ret)); - } else { - type_ = ObRbType::BITMAP; + case ObRbBinType::BITMAP_32: { + uint64_t serial_size = sizeof(uint64_t) + sizeof(uint32_t) + rb_bin.length() - offset; + uint64_t map_size = 1; + uint32_t map_prefix = 0; + ObStringBuffer tmp_buf(allocator_); + if (OB_FAIL(tmp_buf.append(reinterpret_cast(&map_size), sizeof(uint64_t)))) { + LOG_WARN("failed to append map size", K(ret)); + } else if (OB_FAIL(tmp_buf.append(reinterpret_cast(&map_prefix), sizeof(uint32_t)))) { + LOG_WARN("failed to append map prefix", K(ret)); + } else if (OB_FAIL(tmp_buf.append(rb_bin.ptr() + offset, rb_bin.length() - offset))) { + LOG_WARN("failed to append serialized string", K(ret), K(rb_bin)); + } else { + ROARING_TRY_CATCH(bitmap_ = roaring::api::roaring64_bitmap_portable_deserialize_safe(tmp_buf.ptr(),tmp_buf.length())); + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(bitmap_)) { + ret = OB_DESERIALIZE_ERROR; + LOG_WARN("failed to deserialize the bitmap", K(ret)); + } else if (need_validate && !roaring::api::roaring64_bitmap_internal_validate(bitmap_, NULL)) { + ret = OB_INVALID_DATA; + LOG_WARN("bitmap internal consistency checks failed", K(ret)); + } else { + type_ = ObRbType::BITMAP; + } + break; } - break; - } - case ObRbBinType::BITMAP_64: { - if (OB_ISNULL(bitmap_ = roaring::api::roaring64_bitmap_portable_deserialize_safe( - rb_bin.ptr() + offset, - rb_bin.length() - offset))) { - ret = OB_DESERIALIZE_ERROR; - LOG_WARN("failed to deserialize the bitmap", K(ret)); - } else if (!roaring::api::roaring64_bitmap_internal_validate(bitmap_, NULL)) { - ret = OB_INVALID_DATA; - LOG_WARN("bitmap internal consistency checks failed", K(ret)); - } else { - type_ = ObRbType::BITMAP; + case ObRbBinType::BITMAP_64: { + ROARING_TRY_CATCH(bitmap_ = roaring::api::roaring64_bitmap_portable_deserialize_safe(rb_bin.ptr() + offset, rb_bin.length() - offset)); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(bitmap_)) { + ret = OB_DESERIALIZE_ERROR; + LOG_WARN("failed to deserialize the bitmap", K(ret)); + } else if (need_validate && !roaring::api::roaring64_bitmap_internal_validate(bitmap_, NULL)) { + ret = OB_INVALID_DATA; + LOG_WARN("bitmap internal consistency checks failed", K(ret)); + } else { + type_ = ObRbType::BITMAP; + } + break; } - break; - } - default: { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("unknown RbBinType", K(ret), K(bin_type)); - break; - } - } // end switch + default: { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("unknown RbBinType", K(ret), K(bin_type)); + break; + } + } // end switch + } return ret; } @@ -484,7 +482,6 @@ int ObRoaringBitmap::serialize(ObStringBuffer &res_buf) { int ret = OB_SUCCESS; ObRbBinType bin_type; - bool is_32bits_enough = (get_max() <= UINT32_MAX); if (OB_FAIL(res_buf.append(reinterpret_cast(&version_), RB_VERSION_SIZE))) { LOG_WARN("failed to append version", K(ret)); @@ -498,7 +495,7 @@ int ObRoaringBitmap::serialize(ObStringBuffer &res_buf) break; } case ObRbType::SINGLE: { - if (is_32bits_enough) { + if (get_max() <= UINT32_MAX) { bin_type = ObRbBinType::SINGLE_32; uint32_t single_value_32 = static_cast(single_value_); if (OB_FAIL(res_buf.append(reinterpret_cast(&bin_type), RB_BIN_TYPE_SIZE))) { @@ -518,7 +515,7 @@ int ObRoaringBitmap::serialize(ObStringBuffer &res_buf) } case ObRbType::SET: { int8_t set_size = static_cast(set_.size()); - if (is_32bits_enough) { + if (get_max() <= UINT32_MAX) { bin_type = ObRbBinType::SET_32; if (OB_FAIL(res_buf.append(reinterpret_cast(&bin_type), RB_BIN_TYPE_SIZE))) { LOG_WARN("failed to append bin_type", K(ret)); @@ -552,44 +549,19 @@ int ObRoaringBitmap::serialize(ObStringBuffer &res_buf) break; } case ObRbType::BITMAP: { - ObStringBuffer tmp_buf(allocator_); - if (roaring::api::roaring64_bitmap_is_empty(bitmap_)) { - bin_type = ObRbBinType::BITMAP_32; - uint32_t roaring32_cookie = 12346; - uint32_t container_num = 0; - if (OB_FAIL(res_buf.append(reinterpret_cast(&bin_type), RB_BIN_TYPE_SIZE))) { - LOG_WARN("failed to append bin_type", K(ret)); - } else if (OB_FAIL(res_buf.append(reinterpret_cast(&roaring32_cookie), sizeof(uint32_t)))) { - LOG_WARN("failed to append roaring32_cookie", K(ret)); - } else if (OB_FAIL(res_buf.append(reinterpret_cast(&container_num), sizeof(uint32_t)))) { - LOG_WARN("failed to append container_num", K(ret)); - } - } else { - uint64_t header_size = RB_VERSION_SIZE + RB_BIN_TYPE_SIZE; - uint64_t serial_size = static_cast(roaring::api::roaring64_bitmap_portable_size_in_bytes(bitmap_)); - if (OB_FAIL(tmp_buf.reserve(serial_size))) { - LOG_WARN("failed to reserve buffer", K(ret), K(serial_size)); - } else if (serial_size != roaring::api::roaring64_bitmap_portable_serialize(bitmap_, tmp_buf.ptr())) { - ret = OB_SERIALIZE_ERROR; - LOG_WARN("serialize size not match", K(ret), K(serial_size)); - } else if (OB_FAIL(tmp_buf.set_length(serial_size))) { - LOG_WARN("failed to set buffer length", K(ret)); - } else if (is_32bits_enough) { - bin_type = ObRbBinType::BITMAP_32; - uint64_t roaring64_header_size = sizeof(uint64_t) + sizeof(uint32_t); - if (OB_FAIL(res_buf.append(reinterpret_cast(&bin_type), RB_BIN_TYPE_SIZE))) { - LOG_WARN("failed to append bin_type", K(ret)); - } else if (OB_FAIL(res_buf.append(tmp_buf.ptr() + roaring64_header_size, tmp_buf.length() - roaring64_header_size))) { - LOG_WARN("failed to append serialized string", K(ret), K(tmp_buf)); - } - } else { - bin_type = ObRbBinType::BITMAP_64; - if (OB_FAIL(res_buf.append(reinterpret_cast(&bin_type), RB_BIN_TYPE_SIZE))) { - LOG_WARN("failed to append bin_type", K(ret)); - } else if (OB_FAIL(res_buf.append(tmp_buf.ptr(), tmp_buf.length()))) { - LOG_WARN("failed to append serialized string", K(ret), K(tmp_buf)); - } - } + bin_type = ObRbBinType::BITMAP_64; + uint64_t serial_size = 0; + ROARING_TRY_CATCH(serial_size = static_cast(roaring::api::roaring64_bitmap_portable_size_in_bytes(bitmap_))); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(res_buf.reserve(RB_BIN_TYPE_SIZE + serial_size))) { + LOG_WARN("failed to reserve buffer", K(ret), K(serial_size)); + } else if (OB_FAIL(res_buf.append(reinterpret_cast(&bin_type), RB_BIN_TYPE_SIZE))) { + LOG_WARN("failed to append bin_type", K(ret)); + } else if (serial_size != roaring::api::roaring64_bitmap_portable_serialize(bitmap_, res_buf.ptr() + res_buf.length())) { + ret = OB_SERIALIZE_ERROR; + LOG_WARN("serialize size not match", K(ret), K(serial_size)); + } else if (OB_FAIL(res_buf.set_length(res_buf.length() + serial_size))) { + LOG_WARN("failed to set buffer length", K(ret)); } break; } @@ -607,55 +579,24 @@ int ObRoaringBitmap::convert_to_bitmap() { int ret = OB_SUCCESS; if (is_bitmap_type()) { // do nothing - } else if (OB_ISNULL(bitmap_ = roaring::api::roaring64_bitmap_create())) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to create bitmap", K(ret)); } else { - if (is_single_type()) { - roaring::api::roaring64_bitmap_add(bitmap_, single_value_); - single_value_ = 0; - } else if (is_set_type()) { - hash::ObHashSet::const_iterator iter; - for (iter = set_.begin(); iter != set_.end(); iter++) { - roaring::api::roaring64_bitmap_add(bitmap_, iter->first); - } - set_.destroy(); - } - type_ = ObRbType::BITMAP; - } - return ret; -} - -int ObRoaringBitmap::convert_bitmap_to_smaller_type() { - int ret = OB_SUCCESS; - if (is_bitmap_type()) { - uint64_t cardinality = roaring::api::roaring64_bitmap_get_cardinality(bitmap_); - if (cardinality == 0) { - set_empty(); - } else if (cardinality == 1) { - set_single(roaring::api::roaring64_bitmap_minimum(bitmap_)); - } else if (cardinality <= MAX_BITMAP_SET_VALUES) { - roaring::api::roaring64_iterator_t* it = nullptr; - if (OB_ISNULL(it = roaring::api::roaring64_iterator_create(bitmap_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to create bitmap iterator", K(ret)); - } else if (OB_FAIL(set_.create(MAX_BITMAP_SET_VALUES))) { - LOG_WARN("failed to create set", K(ret)); - } else if (OB_FALSE_IT(type_ = ObRbType::SET)) { - } else { - do { - if (OB_FAIL(set_.set_refactored(roaring::api::roaring64_iterator_value(it)))) { - LOG_WARN("failed to set value to the set", K(ret), K(roaring::api::roaring64_iterator_value(it))); - } - } while (roaring::api::roaring64_iterator_advance(it) && OB_SUCC(ret)); - } - if (OB_NOT_NULL(it)) { - roaring::api::roaring64_iterator_free(it); - } - if (OB_NOT_NULL(bitmap_)) { - roaring::api::roaring64_bitmap_free(bitmap_); - bitmap_ = nullptr; + ROARING_TRY_CATCH(bitmap_ = roaring::api::roaring64_bitmap_create()); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(bitmap_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create bitmap", K(ret)); + } else { + if (is_single_type()) { + roaring::api::roaring64_bitmap_add(bitmap_, single_value_); + single_value_ = 0; + } else if (is_set_type()) { + hash::ObHashSet::const_iterator iter; + for (iter = set_.begin(); iter != set_.end(); iter++) { + roaring::api::roaring64_bitmap_add(bitmap_, iter->first); + } + set_.destroy(); } + type_ = ObRbType::BITMAP; } } return ret; diff --git a/deps/oblib/src/lib/roaringbitmap/ob_roaringbitmap.h b/deps/oblib/src/lib/roaringbitmap/ob_roaringbitmap.h index 7ddf862eff..ba4bbf5e62 100644 --- a/deps/oblib/src/lib/roaringbitmap/ob_roaringbitmap.h +++ b/deps/oblib/src/lib/roaringbitmap/ob_roaringbitmap.h @@ -30,6 +30,14 @@ namespace common { #define MAX_BITMAP_SET_VALUES 32 #define IS_VALID_RB_VERSION(ver) (ver == BITMAP_VESION_1) +#define ROARING_TRY_CATCH(statement) \ + try { \ + statement; \ + } catch (const std::bad_alloc &e) { \ + ret = OB_ALLOCATE_MEMORY_FAILED; \ + LOG_WARN("fail to alloc memory in croaring", K(ret)); \ + } + static const uint32_t RB_VERSION_SIZE = sizeof(uint8_t); static const uint32_t RB_TYPE_SIZE = sizeof(uint8_t); static const uint32_t RB_BIN_TYPE_SIZE = sizeof(uint8_t); @@ -94,10 +102,9 @@ public: int value_or(ObRoaringBitmap *rb); int value_xor(ObRoaringBitmap *rb); int value_andnot(ObRoaringBitmap *rb); - int value_calc(ObRoaringBitmap *rb, ObRbOperation op); int optimize(); - int deserialize(const ObString &rb_bin); + int deserialize(const ObString &rb_bin, bool need_validate = false); int serialize(ObStringBuffer &res_rb_bin); inline void set_empty() { @@ -123,7 +130,6 @@ public: type_ = ObRbType::SINGLE; } - int convert_bitmap_to_smaller_type(); int convert_to_bitmap(); private: diff --git a/mittest/mtlenv/mock_tenant_module_env.h b/mittest/mtlenv/mock_tenant_module_env.h index 6ad735fbd0..118a31668f 100644 --- a/mittest/mtlenv/mock_tenant_module_env.h +++ b/mittest/mtlenv/mock_tenant_module_env.h @@ -97,6 +97,7 @@ #include "storage/tenant_snapshot/ob_tenant_snapshot_service.h" #include "storage/tmp_file/ob_tmp_file_manager.h" // ObTenantTmpFileManager #include "storage/memtable/ob_lock_wait_mgr.h" +#include "lib/roaringbitmap/ob_rb_memory_mgr.h" namespace oceanbase { @@ -702,6 +703,7 @@ int MockTenantModuleEnv::init() MTL_BIND2(mtl_new_default, ObStorageLogger::mtl_init, ObStorageLogger::mtl_start, ObStorageLogger::mtl_stop, ObStorageLogger::mtl_wait, mtl_destroy_default); MTL_BIND2(ObTenantMetaMemMgr::mtl_new, mtl_init_default, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, share::ObSharedMemAllocMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); + MTL_BIND2(mtl_new_default, common::ObRbMemMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObTransService::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, logservice::ObGarbageCollector::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObTimestampService::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); diff --git a/src/observer/ob_server.cpp b/src/observer/ob_server.cpp index 804d98e3f0..7a851ad495 100644 --- a/src/observer/ob_server.cpp +++ b/src/observer/ob_server.cpp @@ -128,6 +128,7 @@ #endif #include "lib/xml/ob_libxml2_sax_handler.h" #include "ob_check_params.h" +#include "lib/roaringbitmap/ob_rb_memory_mgr.h" #ifdef OB_BUILD_AUDIT_SECURITY #include "sql/audit/ob_audit_log_mgr.h" #endif @@ -2720,6 +2721,12 @@ int ObServer::init_sql() ObLibXml2SaxHandler::init(); } + if (OB_SUCC(ret)) { + if (OB_FAIL(ObRbMemMgr::init_memory_hook())) { + LOG_ERROR("fail initialize roaring memory hook", KR(ret)); + } + } + if (OB_SUCC(ret)) { LOG_INFO("init sql done"); } else { diff --git a/src/observer/omt/ob_multi_tenant.cpp b/src/observer/omt/ob_multi_tenant.cpp index 1109b35b67..794bc90f38 100644 --- a/src/observer/omt/ob_multi_tenant.cpp +++ b/src/observer/omt/ob_multi_tenant.cpp @@ -165,6 +165,7 @@ #include "rootserver/mview/ob_mview_maintenance_service.h" #include "share/resource_limit_calculator/ob_resource_limit_calculator.h" #include "storage/checkpoint/ob_checkpoint_diagnose.h" +#include "lib/roaringbitmap/ob_rb_memory_mgr.h" #include "storage/tmp_file/ob_tmp_file_manager.h" // ObTenantTmpFileManager #include "storage/restore/ob_tenant_restore_info_mgr.h" #ifdef OB_BUILD_AUDIT_SECURITY @@ -461,6 +462,7 @@ int ObMultiTenant::init(ObAddr myaddr, MTL_BIND2(mtl_new_default, ObStorageLogger::mtl_init, ObStorageLogger::mtl_start, ObStorageLogger::mtl_stop, ObStorageLogger::mtl_wait, mtl_destroy_default); MTL_BIND2(ObTenantMetaMemMgr::mtl_new, mtl_init_default, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, share::ObSharedMemAllocMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); + MTL_BIND2(mtl_new_default, common::ObRbMemMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObTransService::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObLogService::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, ObLogService::mtl_destroy); MTL_BIND2(mtl_new_default, logservice::ObGarbageCollector::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); diff --git a/src/share/rc/ob_tenant_base.h b/src/share/rc/ob_tenant_base.h index 40c4699aa1..cb2017fc72 100755 --- a/src/share/rc/ob_tenant_base.h +++ b/src/share/rc/ob_tenant_base.h @@ -35,6 +35,7 @@ namespace common { template class ObServerObjectPool; class ObDetectManager; class ObOptStatMonitorManager; + class ObRbMemMgr; } namespace omt { class ObPxPools; @@ -374,6 +375,7 @@ using ObTableScanIteratorObjPool = common::ObServerObjectPool(tmp_alloc.alloc(sizeof(ObObj) * (storted_row->cnt_))))) { ret = OB_ALLOCATE_MEMORY_FAILED; @@ -8441,7 +8441,7 @@ int ObAggregateProcessor::get_rb_build_agg_result(const ObAggrInfo &aggr_info, } else if (OB_FAIL(convert_datum_to_obj(aggr_info, *storted_row, tmp_obj, storted_row->cnt_))) { LOG_WARN("failed to convert datum to obj", K(ret)); } else if (tmp_obj->is_null()) { - null_val = true; + is_null_val = true; } else if (tmp_obj->is_unsigned_integer()) { val = tmp_obj->get_uint64(); } else if (tmp_obj->is_signed_integer()) { @@ -8460,7 +8460,7 @@ int ObAggregateProcessor::get_rb_build_agg_result(const ObAggrInfo &aggr_info, ret = OB_ERR_INVALID_TYPE_FOR_ARGUMENT; LOG_WARN("invalid data type for roaringbitmap build agg"); } - if (OB_FAIL(ret) || null_val) { + if (OB_FAIL(ret) || is_null_val) { } else if (OB_ISNULL(rb) && OB_ISNULL(rb = OB_NEWx(ObRoaringBitmap, &tmp_alloc, (&tmp_alloc)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to create alloc memory to roaringbitmap", K(ret)); @@ -8522,13 +8522,16 @@ int ObAggregateProcessor::get_rb_calc_agg_result(const ObAggrInfo &aggr_info, bool inited_tmp_obj = false; ObObj *tmp_obj = NULL; ObRoaringBitmap *rb = NULL; + bool calc_finished = false; - while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row(storted_row))) { + while (OB_SUCC(ret) && !calc_finished && OB_SUCC(extra->get_next_row(storted_row))) { if (OB_ISNULL(storted_row)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(storted_row)); } else { ObString tmp_bin; + ObString tmp_rb_bin; + bool is_null_obj = false; // get obj if (!inited_tmp_obj && OB_ISNULL(tmp_obj = static_cast(tmp_alloc.alloc(sizeof(ObObj) * (storted_row->cnt_))))) { @@ -8538,27 +8541,38 @@ int ObAggregateProcessor::get_rb_calc_agg_result(const ObAggrInfo &aggr_info, } else if (OB_FAIL(convert_datum_to_obj(aggr_info, *storted_row, tmp_obj, storted_row->cnt_))) { LOG_WARN("failed to convert datum to obj", K(ret)); } else if (tmp_obj->is_null()) { - // do noting for null - } else if (!(tmp_obj->is_roaringbitmap() - || tmp_obj->is_roaringbitmap() - || tmp_obj->is_hex_string())) { + is_null_obj = true; + } else if (!(tmp_obj->is_roaringbitmap() || tmp_obj->is_hex_string())) { ret = OB_ERR_INVALID_TYPE_FOR_ARGUMENT; LOG_WARN("invalid data type for roaringbitmap agg"); } else if (OB_FALSE_IT(tmp_bin = tmp_obj->get_string())) { } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&tmp_alloc, *tmp_obj, tmp_bin))) { LOG_WARN("failed to get real data.", K(ret), K(tmp_bin)); + } else if (!tmp_obj->is_roaringbitmap()) { + if (OB_FAIL(ObRbUtils::build_binary(tmp_alloc, tmp_bin, tmp_rb_bin))) { + LOG_WARN("failed to build roaringbitmap from binary", K(ret), K(tmp_bin)); + } + } else { + tmp_rb_bin = tmp_bin; + } + + if (OB_FAIL(ret) || is_null_obj) { } else if (OB_ISNULL(rb)) { - if (OB_FAIL(ObRbUtils::rb_deserialize(tmp_alloc, tmp_bin, rb))) { + if (OB_FAIL(ObRbUtils::rb_deserialize(tmp_alloc, tmp_rb_bin, rb))) { LOG_WARN("failed to deserialize roaringbitmap", K(ret)); + } else if (calc_op == ObRbOperation::AND && rb->get_cardinality() == 0) { + calc_finished = true; } } else { ObRoaringBitmap *tmp_rb = NULL; - if (OB_FAIL(ObRbUtils::rb_deserialize(tmp_alloc, tmp_bin, tmp_rb))){ + if (OB_FAIL(ObRbUtils::rb_deserialize(tmp_alloc, tmp_rb_bin, tmp_rb))){ LOG_WARN("failed to deserialize roaringbitmap", K(ret)); - } else if (OB_FAIL(rb->value_calc(tmp_rb, calc_op))) { + } else if (OB_FAIL(ObRbUtils::calc_inplace(rb, tmp_rb, calc_op))) { LOG_WARN("failed to calculate roaringbitmap", K(ret)); - } else if (OB_FALSE_IT(ObRbUtils::rb_destroy(tmp_rb))) { + } else if (calc_op == ObRbOperation::AND && rb->get_cardinality() == 0) { + calc_finished = true; } + ObRbUtils::rb_destroy(tmp_rb); } } }//end of while diff --git a/src/sql/engine/expr/ob_expr_rb_build_empty.cpp b/src/sql/engine/expr/ob_expr_rb_build_empty.cpp index 2f0d18ecb2..f44f65b44e 100644 --- a/src/sql/engine/expr/ob_expr_rb_build_empty.cpp +++ b/src/sql/engine/expr/ob_expr_rb_build_empty.cpp @@ -52,18 +52,12 @@ int ObExprRbBuildEmpty::eval_rb_build_empty(const ObExpr &expr, ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(ObRbExprHelper::get_tenant_id(ctx.exec_ctx_.get_my_session()), "ROARINGBITMAP")); - ObString rb_bin; - ObRoaringBitmap *rb_empty; - if (OB_ISNULL(rb_empty = OB_NEWx(ObRoaringBitmap, &tmp_allocator, (&tmp_allocator)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to create alloc memory to roaringbitmap", K(ret)); - } else if (OB_FAIL(ObRbUtils::rb_serialize(tmp_allocator, rb_bin, rb_empty))) { - LOG_WARN("failed to serialize empty roaringbitmap", K(ret)); + if (OB_FAIL(ObRbUtils::build_empty_binary(tmp_allocator, rb_bin))) { + LOG_WARN("fail to build empty rb binary", K(ret)); } else if (OB_FAIL(ObRbExprHelper::pack_rb_res(expr, ctx, res, rb_bin))) { LOG_WARN("fail to pack roaringbitmap res", K(ret)); } - ObRbUtils::rb_destroy(rb_empty); return ret; } diff --git a/src/sql/engine/expr/ob_expr_rb_build_varbinary.cpp b/src/sql/engine/expr/ob_expr_rb_build_varbinary.cpp index ee27aee414..35adc58303 100644 --- a/src/sql/engine/expr/ob_expr_rb_build_varbinary.cpp +++ b/src/sql/engine/expr/ob_expr_rb_build_varbinary.cpp @@ -69,21 +69,14 @@ int ObExprRbBuildVarbinary::eval_rb_build_varbinary(const ObExpr &expr, ObExpr *rb_arg = expr.args_[0]; bool is_null_result = false; bool is_rb_null = false; - ObString rb_bin = nullptr; - ObString res_rb_bin = nullptr; + ObString rb_bin; + ObString res_rb_bin; if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, tmp_allocator, rb_arg, rb_bin, is_rb_null))) { LOG_WARN("fail to get input roaringbitmap", K(ret)); } else if (is_rb_null || rb_bin == nullptr) { - is_null_result = true; - } else if (OB_FAIL(ObRbUtils::build_binary(tmp_allocator, rb_bin, res_rb_bin))) { - LOG_WARN("failed to build rb binary", K(ret)); - } - - if (OB_FAIL(ret)) { - } else if (is_null_result) { res.set_null(); - } else if (OB_FAIL(ObRbExprHelper::pack_rb_res(expr, ctx, res, res_rb_bin))) { + } else if (OB_FAIL(ObRbExprHelper::pack_rb_res(expr, ctx, res, rb_bin))) { LOG_WARN("fail to pack roaringbitmap res", K(ret)); } diff --git a/src/sql/engine/expr/ob_expr_rb_calc.cpp b/src/sql/engine/expr/ob_expr_rb_calc.cpp index b9469be889..be37c21504 100644 --- a/src/sql/engine/expr/ob_expr_rb_calc.cpp +++ b/src/sql/engine/expr/ob_expr_rb_calc.cpp @@ -69,39 +69,62 @@ int ObExprRbCalc::eval_rb_calc(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res, bool is_rb1_null = false; bool is_rb2_null = false; bool is_res_null = false; - ObRoaringBitmap *rb1 = nullptr; - ObRoaringBitmap *rb2 = nullptr; - ObString rb_res; - if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap(ctx, tmp_allocator, rb1_arg, rb1, is_rb1_null))) { - LOG_WARN("failed to get left input roaringbitmap", K(ret)); - } else if (is_rb1_null && !is_null2empty) { - is_res_null = true; - } else if (is_rb1_null && is_null2empty - && OB_ISNULL(rb1 = OB_NEWx(ObRoaringBitmap, &tmp_allocator, (&tmp_allocator)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to create alloc memory to roaringbitmap", K(ret)); - } else if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap(ctx, tmp_allocator, rb2_arg, rb2, is_rb2_null))) { - LOG_WARN("failed to get right input roaringbitmap", K(ret)); - } else if (is_rb2_null && !is_null2empty) { - is_res_null = true; - } else if (is_rb2_null && is_null2empty - && OB_ISNULL(rb2 = OB_NEWx(ObRoaringBitmap, &tmp_allocator, (&tmp_allocator)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to create alloc memory to roaringbitmap", K(ret)); - } else if (OB_FAIL(rb1->value_calc(rb2, op))) { - LOG_WARN("failed to calcutlate roaringbitmap value_and", K(ret)); + ObString res_rb_bin; + if (op == ObRbOperation::AND || op == ObRbOperation::ANDNOT) { + ObString rb1_bin; + ObString rb2_bin; + if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, tmp_allocator, rb1_arg, rb1_bin, is_rb1_null))) { + LOG_WARN("fail to get left input roaringbitmap", K(ret)); + } else if (is_rb1_null && !is_null2empty) { + is_res_null = true; + } else if (is_rb1_null && is_null2empty && OB_FAIL(ObRbUtils::build_empty_binary(tmp_allocator, rb1_bin))) { + LOG_WARN("failed to build empty roaringbitmap binary", K(ret)); + } else if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, tmp_allocator, rb2_arg, rb2_bin, is_rb2_null))) { + LOG_WARN("fail to get right input roaringbitmap", K(ret)); + } else if (is_rb2_null && !is_null2empty) { + is_res_null = true; + } else if (is_rb2_null && is_null2empty && OB_FAIL(ObRbUtils::build_empty_binary(tmp_allocator, rb2_bin))) { + LOG_WARN("failed to build empty roaringbitmap binary", K(ret)); + } else if (OB_FAIL(ObRbUtils::binary_calc(tmp_allocator, rb1_bin, rb2_bin, res_rb_bin, op))) { + LOG_WARN("failed to calculate roaringbitmap", K(ret), K(rb1_bin), K(rb2_bin), K(op)); + } + } else if (op == ObRbOperation::OR || op == ObRbOperation::XOR) { + ObRoaringBitmap *rb1 = nullptr; + ObRoaringBitmap *rb2 = nullptr; + if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap(ctx, tmp_allocator, rb1_arg, rb1, is_rb1_null))) { + LOG_WARN("failed to get left input roaringbitmap", K(ret)); + } else if (is_rb1_null && !is_null2empty) { + is_res_null = true; + } else if (is_rb1_null && is_null2empty + && OB_ISNULL(rb1 = OB_NEWx(ObRoaringBitmap, &tmp_allocator, (&tmp_allocator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create alloc memory to roaringbitmap", K(ret)); + } else if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap(ctx, tmp_allocator, rb2_arg, rb2, is_rb2_null))) { + LOG_WARN("failed to get right input roaringbitmap", K(ret)); + } else if (is_rb2_null && !is_null2empty) { + is_res_null = true; + } else if (is_rb2_null && is_null2empty + && OB_ISNULL(rb2 = OB_NEWx(ObRoaringBitmap, &tmp_allocator, (&tmp_allocator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create alloc memory to roaringbitmap", K(ret)); + } else if (OB_FAIL(ObRbUtils::calc_inplace(rb1, rb2, op))) { + LOG_WARN("failed to calcutlate roaringbitmap inplace", K(ret)); + } else if (OB_FAIL(ObRbUtils::rb_serialize(tmp_allocator, res_rb_bin, rb1))) { + LOG_WARN("failed to serialize roaringbitmap", K(ret)); + } + ObRbUtils::rb_destroy(rb1); + ObRbUtils::rb_destroy(rb2); + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("operation is not supported", K(ret), K(op)); } if (OB_FAIL(ret)) { } else if (is_res_null) { res.set_null(); - } else if (OB_FAIL(ObRbUtils::rb_serialize(tmp_allocator, rb_res, rb1))) { - LOG_WARN("failed to serialize roaringbitmap", K(ret)); - } else if (OB_FAIL(ObRbExprHelper::pack_rb_res(expr, ctx, res, rb_res))) { + } else if (OB_FAIL(ObRbExprHelper::pack_rb_res(expr, ctx, res, res_rb_bin))) { LOG_WARN("fail to pack roaringbitmap res", K(ret)); } - ObRbUtils::rb_destroy(rb1); - ObRbUtils::rb_destroy(rb2); return ret; } diff --git a/src/sql/engine/expr/ob_expr_rb_calc_cardinality.cpp b/src/sql/engine/expr/ob_expr_rb_calc_cardinality.cpp index ebdb0cb259..dd88be03cd 100644 --- a/src/sql/engine/expr/ob_expr_rb_calc_cardinality.cpp +++ b/src/sql/engine/expr/ob_expr_rb_calc_cardinality.cpp @@ -70,36 +70,30 @@ int ObExprRbCalcCardinality::eval_rb_calc_cardinality(const ObExpr &expr, ObEval bool is_rb1_null = false; bool is_rb2_null = false; bool is_res_null = false; - ObRoaringBitmap *rb1 = nullptr; - ObRoaringBitmap *rb2 = nullptr; + ObString rb1_bin; + ObString rb2_bin; uint64_t cardinality = 0; - if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap(ctx, tmp_allocator, rb1_arg, rb1, is_rb1_null))) { - LOG_WARN("failed to get left input roaringbitmap", K(ret)); + if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, tmp_allocator, rb1_arg, rb1_bin, is_rb1_null))) { + LOG_WARN("fail to get left input roaringbitmap", K(ret)); } else if (is_rb1_null && !is_null2empty) { is_res_null = true; - } else if (is_rb1_null && is_null2empty - && OB_ISNULL(rb1 = OB_NEWx(ObRoaringBitmap, &tmp_allocator, (&tmp_allocator)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to create alloc memory to roaringbitmap", K(ret)); - } else if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap(ctx, tmp_allocator, rb2_arg, rb2, is_rb2_null))) { - LOG_WARN("failed to get right input roaringbitmap", K(ret)); + } else if (is_rb1_null && is_null2empty && OB_FAIL(ObRbUtils::build_empty_binary(tmp_allocator, rb1_bin))) { + LOG_WARN("failed to build empty roaringbitmap binary", K(ret)); + } else if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, tmp_allocator, rb2_arg, rb2_bin, is_rb2_null))) { + LOG_WARN("fail to get right input roaringbitmap", K(ret)); } else if (is_rb2_null && !is_null2empty) { is_res_null = true; - } else if (is_rb2_null && is_null2empty - && OB_ISNULL(rb2 = OB_NEWx(ObRoaringBitmap, &tmp_allocator, (&tmp_allocator)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to create alloc memory to roaringbitmap", K(ret)); + } else if (is_rb2_null && is_null2empty && OB_FAIL(ObRbUtils::build_empty_binary(tmp_allocator, rb2_bin))) { + LOG_WARN("failed to build empty roaringbitmap binary", K(ret)); + } else if (OB_FAIL(ObRbUtils::get_calc_cardinality(tmp_allocator, rb1_bin, rb2_bin, cardinality, op))) { + LOG_WARN("failed to get roaringbitmap claculate cardinality", K(ret), K(op)); } - if (OB_FAIL(ret)) { } else if (is_res_null) { res.set_null(); } else { - ObRbUtils::calc_cardinality(rb1, rb2, cardinality, op); res.set_uint(cardinality); } - ObRbUtils::rb_destroy(rb1); - ObRbUtils::rb_destroy(rb2); return ret; } diff --git a/src/sql/engine/expr/ob_expr_rb_cardinality.cpp b/src/sql/engine/expr/ob_expr_rb_cardinality.cpp index 6f5730a504..098db08109 100644 --- a/src/sql/engine/expr/ob_expr_rb_cardinality.cpp +++ b/src/sql/engine/expr/ob_expr_rb_cardinality.cpp @@ -63,15 +63,12 @@ int ObExprRbCardinality::eval_rb_cardinality(const ObExpr &expr, ObEvalCtx &ctx, ObExpr *rb_arg = expr.args_[0]; bool is_rb_null = false; ObString rb_bin; - ObRbBinType bin_type; uint64_t cardinality = 0; if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, tmp_allocator, rb_arg, rb_bin, is_rb_null))) { LOG_WARN("fail to get input roaringbitmap", K(ret)); } else if (is_rb_null || rb_bin == nullptr) { res.set_null(); - } else if (OB_FAIL(ObRbUtils::check_get_bin_type(rb_bin, bin_type))) { - LOG_WARN("invalid roaringbitmap binary string", K(ret)); - } else if (OB_FAIL(ObRbUtils::get_cardinality(tmp_allocator, rb_bin, bin_type, cardinality))){ + } else if (OB_FAIL(ObRbUtils::get_cardinality(tmp_allocator, rb_bin, cardinality))){ LOG_WARN("failed to get cardinality from roaringbitmap binary", K(ret)); } else { res.set_uint(cardinality); diff --git a/src/sql/engine/expr/ob_expr_rb_func_helper.cpp b/src/sql/engine/expr/ob_expr_rb_func_helper.cpp index bdfc38c8fa..2937ef3609 100644 --- a/src/sql/engine/expr/ob_expr_rb_func_helper.cpp +++ b/src/sql/engine/expr/ob_expr_rb_func_helper.cpp @@ -33,22 +33,25 @@ namespace sql int ObRbExprHelper::get_input_roaringbitmap_bin(ObEvalCtx &ctx, ObIAllocator &allocator, ObExpr *rb_arg, ObString &rb_bin, bool &is_rb_null) { INIT_SUCC(ret); - ObDatum *rb_datum; + ObDatum *rb_datum = nullptr; + ObString get_str; if (OB_FAIL(rb_arg->eval(ctx, rb_datum))) { LOG_WARN("eval roaringbitmap args failed", K(ret)); } else if (rb_datum->is_null()) { is_rb_null = true; - } else if (OB_FALSE_IT(rb_bin = rb_datum->get_string())) { } else if (OB_FAIL(ObTextStringHelper::read_real_string_data( allocator, *rb_datum, rb_arg->datum_meta_, rb_arg->obj_meta_.has_lob_header(), - rb_bin))) { - LOG_WARN("fail to get real string data", K(ret), K(rb_bin)); - } else if (rb_bin.empty()) { - ret = OB_INVALID_DATA; - LOG_WARN("roaringbitmap binary is empty", K(ret), K(rb_bin)); + get_str))) { + LOG_WARN("fail to get real string data", K(ret), K(get_str)); + } else if (rb_arg->datum_meta_.type_ != ObRoaringBitmapType) { + if (OB_FAIL(ObRbUtils::build_binary(allocator, get_str, rb_bin))) { + LOG_WARN("failed to build roaringbitmap from binary", K(ret), K(get_str)); + } + } else { + rb_bin.assign_ptr(get_str.ptr(), get_str.length()); } return ret; } @@ -56,11 +59,30 @@ int ObRbExprHelper::get_input_roaringbitmap_bin(ObEvalCtx &ctx, ObIAllocator &al int ObRbExprHelper::get_input_roaringbitmap(ObEvalCtx &ctx, ObIAllocator &allocator, ObExpr *rb_arg, ObRoaringBitmap *&rb, bool &is_rb_null) { INIT_SUCC(ret); - ObString rb_bin = nullptr; - if (OB_FAIL(get_input_roaringbitmap_bin(ctx, allocator, rb_arg, rb_bin, is_rb_null))) { - LOG_WARN("failed to get input roaringbitmap binary", K(ret)); - } else if (!is_rb_null && OB_FAIL(ObRbUtils::rb_deserialize(allocator, rb_bin, rb))) { - LOG_WARN("failed to deserialize roaringbitmap", K(ret)); + ObDatum *rb_datum = nullptr; + ObString get_str; + if (OB_FAIL(rb_arg->eval(ctx, rb_datum))) { + LOG_WARN("eval roaringbitmap args failed", K(ret)); + } else if (rb_datum->is_null()) { + is_rb_null = true; + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data( + allocator, + *rb_datum, + rb_arg->datum_meta_, + rb_arg->obj_meta_.has_lob_header(), + get_str))) { + LOG_WARN("fail to get real string data", K(ret), K(get_str)); + } else if (rb_arg->datum_meta_.type_ != ObRoaringBitmapType) { + bool need_validate = true; + if (OB_FAIL(ObRbUtils::check_binary(get_str))) { + LOG_WARN("invalid roaringbitmap binary string", K(ret)); + } else if (OB_FAIL(ObRbUtils::rb_deserialize(allocator, get_str, rb, need_validate))) { + LOG_WARN("failed to deserialize roaringbitmap", K(ret)); + } + } else { + if (OB_FAIL(ObRbUtils::rb_deserialize(allocator, get_str, rb))) { + LOG_WARN("failed to deserialize roaringbitmap", K(ret)); + } } return ret; } diff --git a/src/sql/engine/expr/ob_expr_rb_is_empty.cpp b/src/sql/engine/expr/ob_expr_rb_is_empty.cpp index 4cf01f1492..fc53005294 100644 --- a/src/sql/engine/expr/ob_expr_rb_is_empty.cpp +++ b/src/sql/engine/expr/ob_expr_rb_is_empty.cpp @@ -61,21 +61,19 @@ int ObExprRbIsEmpty::eval_rb_is_empty(const ObExpr &expr, ObEvalCtx &ctx, ObDatu lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(ObRbExprHelper::get_tenant_id(ctx.exec_ctx_.get_my_session()), "ROARINGBITMAP")); ObExpr *rb_arg = expr.args_[0]; bool is_rb_null = false; - ObRoaringBitmap *rb = nullptr; - ObDatum *rb_datum = nullptr; - if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap(ctx, tmp_allocator, rb_arg, rb, is_rb_null))) { + ObString rb_bin; + uint64_t cardinality = 0; + if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, tmp_allocator, rb_arg, rb_bin, is_rb_null))) { LOG_WARN("fail to get input roaringbitmap", K(ret)); - } else if (is_rb_null) { + } else if (is_rb_null || rb_bin == nullptr) { res.set_null(); + } else if (OB_FAIL(ObRbUtils::get_cardinality(tmp_allocator, rb_bin, cardinality))){ + LOG_WARN("failed to get cardinality from roaringbitmap binary", K(ret)); + } else if (cardinality == 0) { + res.set_bool(true); } else { - uint64_t cardinality = rb->get_cardinality(); - if (cardinality == 0) { - res.set_bool(true); - } else { - res.set_bool(false); - } + res.set_bool(false); } - ObRbUtils::rb_destroy(rb); return ret; } diff --git a/src/sql/engine/expr/ob_expr_rb_to_string.cpp b/src/sql/engine/expr/ob_expr_rb_to_string.cpp index a09bca071d..0c686a88ac 100644 --- a/src/sql/engine/expr/ob_expr_rb_to_string.cpp +++ b/src/sql/engine/expr/ob_expr_rb_to_string.cpp @@ -64,27 +64,14 @@ int ObExprRbToString::eval_rb_to_string(const ObExpr &expr, ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(ObRbExprHelper::get_tenant_id(ctx.exec_ctx_.get_my_session()), "ROARINGBITMAP")); - ObExpr *arg = expr.args_[0]; + ObExpr *rb_arg = expr.args_[0]; bool is_rb_null = false; - ObDatum *datum = nullptr; ObString rb_bin; ObString rb_str; - if (OB_FAIL(arg->eval(ctx, datum))) { - LOG_WARN("eval roaringbitmap args failed", K(ret)); - } else if (datum->is_null()) { - is_rb_null = true; - } else if (OB_FALSE_IT(rb_bin = datum->get_string())) { - } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(tmp_allocator, - *datum, - arg->datum_meta_, - arg->obj_meta_.has_lob_header(), - rb_bin))) { - LOG_WARN("failed to get real string data", K(ret), K(rb_bin)); - } - - if (OB_FAIL(ret)) { - } else if (is_rb_null) { + if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, tmp_allocator, rb_arg, rb_bin, is_rb_null))) { + LOG_WARN("fail to get input roaringbitmap", K(ret)); + } else if (is_rb_null || rb_bin == nullptr) { res.set_null(); } else if (OB_FAIL(ObRbUtils::rb_to_string(tmp_allocator, rb_bin, rb_str))) { LOG_WARN("failed to print roaringbitmap to string", K(ret)); diff --git a/src/sql/engine/expr/ob_expr_rb_to_varbinary.cpp b/src/sql/engine/expr/ob_expr_rb_to_varbinary.cpp index 4b0ee5462d..c348dce399 100644 --- a/src/sql/engine/expr/ob_expr_rb_to_varbinary.cpp +++ b/src/sql/engine/expr/ob_expr_rb_to_varbinary.cpp @@ -72,27 +72,17 @@ int ObExprRbToVarbinary::eval_rb_to_varbinary(const ObExpr &expr, ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(ObRbExprHelper::get_tenant_id(ctx.exec_ctx_.get_my_session()), "ROARINGBITMAP")); - ObExpr *arg = expr.args_[0]; + ObExpr *rb_arg = expr.args_[0]; bool is_rb_null = false; ObDatum *datum = nullptr; ObString rb_bin; ObString expected_format; ObString res_bin; - if (OB_FAIL(arg->eval(ctx, datum))) { - LOG_WARN("eval roaringbitmap args failed", K(ret)); - } else if (datum->is_null()) { - is_rb_null = true; - } else if (OB_FALSE_IT(rb_bin = datum->get_string())) { - } else if (OB_FAIL(ObTextStringHelper::read_real_string_data( - tmp_allocator, - *datum, - arg->datum_meta_, - arg->obj_meta_.has_lob_header(), - rb_bin))) { - LOG_WARN("fail to get real string data", K(ret), K(rb_bin)); + if (OB_FAIL(ObRbExprHelper::get_input_roaringbitmap_bin(ctx, tmp_allocator, rb_arg, rb_bin, is_rb_null))) { + LOG_WARN("fail to get input roaringbitmap", K(ret)); } else if (expr.arg_cnt_ == 1) { - res_bin = rb_bin; + res_bin.assign(rb_bin.ptr(), rb_bin.length()); } else { ObExpr *format_arg = expr.args_[1]; ObDatum *format_datum = nullptr; @@ -112,7 +102,7 @@ int ObExprRbToVarbinary::eval_rb_to_varbinary(const ObExpr &expr, } else if (expected_format.case_compare("roaring") != 0) { ret = OB_NOT_SUPPORTED; LOG_WARN("not supported expected format", K(ret), K(expected_format)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "expected format expect 'roaring' is"); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "expected format except 'roaring' is"); } else if (OB_FAIL(ObRbUtils::binary_format_convert(tmp_allocator, rb_bin, res_bin))) { LOG_WARN("failed to convert binary to roaring format", K(ret), K(rb_bin)); } diff --git a/unittest/share/test_roaringbitmap.cpp b/unittest/share/test_roaringbitmap.cpp index 4545605f40..d474a28315 100644 --- a/unittest/share/test_roaringbitmap.cpp +++ b/unittest/share/test_roaringbitmap.cpp @@ -113,7 +113,7 @@ TEST_F(TestRoaringBitmap, serialize_deserialize) } ObString bin_bitmap; ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_serialize(allocator, bin_bitmap, rb)); - ASSERT_EQ(ObRbBinType::BITMAP_32, static_cast(*(bin_bitmap.ptr() + 1))); + ASSERT_EQ(ObRbBinType::BITMAP_64, static_cast(*(bin_bitmap.ptr() + 1))); ObRoaringBitmap *rb_bitmap; ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_deserialize(allocator, bin_bitmap, rb_bitmap)); ASSERT_EQ(rb->get_version(), rb_bitmap->get_version()); @@ -267,7 +267,7 @@ TEST_F(TestRoaringBitmap, to_roaring_bin) ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_serialize(allocator, bin_empty, rb)); ASSERT_EQ(OB_SUCCESS, ObRbUtils::binary_format_convert(allocator, bin_empty, roaring_bin_empty)); ObRbBinType bin_type_empty; - ASSERT_EQ(OB_SUCCESS, ObRbUtils::check_get_bin_type(roaring_bin_empty, bin_type_empty)); + ASSERT_EQ(OB_SUCCESS, ObRbUtils::get_bin_type(roaring_bin_empty, bin_type_empty)); ASSERT_EQ(ObRbBinType::BITMAP_32, bin_type_empty); ObRoaringBitmap *rb_roaring_empty; ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_deserialize(allocator, roaring_bin_empty, rb_roaring_empty)); @@ -282,7 +282,7 @@ TEST_F(TestRoaringBitmap, to_roaring_bin) ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_serialize(allocator, bin_single, rb)); ASSERT_EQ(OB_SUCCESS, ObRbUtils::binary_format_convert(allocator, bin_single, roaring_bin_single)); ObRbBinType bin_type_single; - ASSERT_EQ(OB_SUCCESS, ObRbUtils::check_get_bin_type(roaring_bin_single, bin_type_single)); + ASSERT_EQ(OB_SUCCESS, ObRbUtils::get_bin_type(roaring_bin_single, bin_type_single)); ASSERT_EQ(ObRbBinType::BITMAP_32, bin_type_single); ObRoaringBitmap *rb_roaring_single; ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_deserialize(allocator, roaring_bin_single, rb_roaring_single)); @@ -298,7 +298,7 @@ TEST_F(TestRoaringBitmap, to_roaring_bin) ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_serialize(allocator, bin_set, rb)); ASSERT_EQ(OB_SUCCESS, ObRbUtils::binary_format_convert(allocator, bin_set, roaring_bin_set)); ObRbBinType bin_type_set; - ASSERT_EQ(OB_SUCCESS, ObRbUtils::check_get_bin_type(roaring_bin_set, bin_type_set)); + ASSERT_EQ(OB_SUCCESS, ObRbUtils::get_bin_type(roaring_bin_set, bin_type_set)); ASSERT_EQ(ObRbBinType::BITMAP_32, bin_type_set); ObRoaringBitmap *rb_roaring_set; ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_deserialize(allocator, roaring_bin_set, rb_roaring_set)); @@ -317,7 +317,7 @@ TEST_F(TestRoaringBitmap, to_roaring_bin) ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_serialize(allocator, bin_bitmap, rb)); ASSERT_EQ(OB_SUCCESS, ObRbUtils::binary_format_convert(allocator, bin_bitmap, roaring_bin_bitmap)); ObRbBinType bin_type_bitmap; - ASSERT_EQ(OB_SUCCESS, ObRbUtils::check_get_bin_type(roaring_bin_bitmap, bin_type_bitmap)); + ASSERT_EQ(OB_SUCCESS, ObRbUtils::get_bin_type(roaring_bin_bitmap, bin_type_bitmap)); ASSERT_EQ(ObRbBinType::BITMAP_32, bin_type_bitmap); ObRoaringBitmap *rb_roaring_bitmap; ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_deserialize(allocator, roaring_bin_bitmap, rb_roaring_bitmap)); @@ -335,7 +335,7 @@ TEST_F(TestRoaringBitmap, to_roaring_bin) ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_serialize(allocator, bin_single64, rb64)); ASSERT_EQ(OB_SUCCESS, ObRbUtils::binary_format_convert(allocator, bin_single64, roaring_bin_single64)); ObRbBinType bin_type_single64; - ASSERT_EQ(OB_SUCCESS, ObRbUtils::check_get_bin_type(roaring_bin_single64, bin_type_single64)); + ASSERT_EQ(OB_SUCCESS, ObRbUtils::get_bin_type(roaring_bin_single64, bin_type_single64)); ASSERT_EQ(ObRbBinType::BITMAP_64, bin_type_single64); ObRoaringBitmap *rb_roaring_single64; ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_deserialize(allocator, roaring_bin_single64, rb_roaring_single64)); @@ -351,7 +351,7 @@ TEST_F(TestRoaringBitmap, to_roaring_bin) ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_serialize(allocator, bin_set64, rb64)); ASSERT_EQ(OB_SUCCESS, ObRbUtils::binary_format_convert(allocator, bin_set64, roaring_bin_set64)); ObRbBinType bin_type_set64; - ASSERT_EQ(OB_SUCCESS, ObRbUtils::check_get_bin_type(roaring_bin_set64, bin_type_set64)); + ASSERT_EQ(OB_SUCCESS, ObRbUtils::get_bin_type(roaring_bin_set64, bin_type_set64)); ASSERT_EQ(ObRbBinType::BITMAP_64, bin_type_set64); ObRoaringBitmap *rb_roaring_set64; ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_deserialize(allocator, roaring_bin_set64, rb_roaring_set64)); @@ -370,7 +370,7 @@ TEST_F(TestRoaringBitmap, to_roaring_bin) ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_serialize(allocator, bin_bitmap64, rb64)); ASSERT_EQ(OB_SUCCESS, ObRbUtils::binary_format_convert(allocator, bin_bitmap64, roaring_bin_bitmap64)); ObRbBinType bin_type_bitmap64; - ASSERT_EQ(OB_SUCCESS, ObRbUtils::check_get_bin_type(roaring_bin_bitmap64, bin_type_bitmap64)); + ASSERT_EQ(OB_SUCCESS, ObRbUtils::get_bin_type(roaring_bin_bitmap64, bin_type_bitmap64)); ASSERT_EQ(ObRbBinType::BITMAP_64, bin_type_bitmap64); ObRoaringBitmap *rb_roaring_bitmap64; ASSERT_EQ(OB_SUCCESS, ObRbUtils::rb_deserialize(allocator, roaring_bin_bitmap64, rb_roaring_bitmap64));