remove duplicate type definition in function context remove unused method in function context not need stale state in vexpr context because vexpr is stateless and function context saves state and they are cloned. remove useless slot_size in all tuple or slot descriptor. remove doris_udf namespace, it is useless. remove some unused macro definitions. init v_conjuncts in vscanner, not need write the same code in every scanner. using unique ptr to manage function context since it could only belong to a single expr context. Issue Number: close #xxx --------- Co-authored-by: yiguolei <yiguolei@gmail.com>
2040 lines
67 KiB
C++
2040 lines
67 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <parallel_hashmap/btree.h>
|
|
|
|
#include <algorithm>
|
|
#include <cstdarg>
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <limits>
|
|
#include <map>
|
|
#include <new>
|
|
#include <numeric>
|
|
#include <roaring/roaring.hh>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
#include "common/logging.h"
|
|
#include "gutil/integral_types.h"
|
|
#include "udf/udf.h"
|
|
#include "util/coding.h"
|
|
#include "vec/common/pod_array.h"
|
|
#include "vec/common/pod_array_fwd.h"
|
|
namespace doris {
|
|
|
|
// serialized bitmap := TypeCode(1), Payload
|
|
// The format of payload depends on value of TypeCode which is defined below
|
|
struct BitmapTypeCode {
|
|
enum type {
|
|
// An empty bitmap. Payload is 0 byte.
|
|
// added in 0.11
|
|
EMPTY = 0,
|
|
// A bitmap containing only one element that is in [0, UINT32_MAX]
|
|
// Payload := UInt32LittleEndian(4 byte)
|
|
// added in 0.11
|
|
SINGLE32 = 1,
|
|
// A bitmap whose maximum element is in [0, UINT32_MAX]
|
|
// Payload := the standard RoaringBitmap format described by
|
|
// https://github.com/RoaringBitmap/RoaringFormatSpec/
|
|
// added in 0.11
|
|
BITMAP32 = 2,
|
|
// A bitmap containing only one element that is in (UINT32_MAX, UINT64_MAX]
|
|
// Payload := UInt64LittleEndian(8 byte)
|
|
// added in 0.12
|
|
SINGLE64 = 3,
|
|
// A bitmap whose maximum element is in (UINT32_MAX, UINT64_MAX].
|
|
//
|
|
// To support 64-bits elements, all elements with the same high 32 bits are stored in a
|
|
// RoaringBitmap containing only the lower 32 bits. Thus we could use
|
|
// map<uint32_t, RoaringBitmap> to represent bitmap of 64-bits ints.
|
|
//
|
|
// Since there is no standard format for 64-bits RoaringBitmap, we define our own as below
|
|
// Payload := NumRoaring(vint64), { MapKey, MapValue }^NumRoaring
|
|
// - MapKey := the shared high 32 bits in UInt32LittleEndian(4 byte)
|
|
// - MapValue := the standard RoaringBitmap format
|
|
//
|
|
// added in 0.12
|
|
BITMAP64 = 4
|
|
};
|
|
Status static inline validate(int bitmap_type) {
|
|
if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type > type::BITMAP64)) {
|
|
std::string err_msg =
|
|
fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
|
|
BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
|
|
LOG(ERROR) << err_msg;
|
|
return Status::IOError(err_msg);
|
|
}
|
|
return Status::OK();
|
|
}
|
|
};
|
|
|
|
namespace detail {
|
|
|
|
class Roaring64MapSetBitForwardIterator;
|
|
class Roaring64MapSetBitBiDirectionalIterator;
|
|
|
|
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.2.60/cpp/roaring64map.hh
|
|
// What we change includes
|
|
// - a custom serialization format is used inside read()/write()/getSizeInBytes()
|
|
// - added clear() and is32BitsEnough()
|
|
class Roaring64Map {
|
|
typedef roaring::api::roaring_bitmap_t roaring_bitmap_t;
|
|
|
|
public:
|
|
/**
|
|
* Create an empty bitmap
|
|
*/
|
|
Roaring64Map() = default;
|
|
|
|
/**
|
|
* Construct a bitmap from a list of 32-bit integer values.
|
|
*/
|
|
Roaring64Map(size_t n, const uint32_t* data) { addMany(n, data); }
|
|
|
|
/**
|
|
* Construct a bitmap from a list of 64-bit integer values.
|
|
*/
|
|
Roaring64Map(size_t n, const uint64_t* data) { addMany(n, data); }
|
|
|
|
/**
|
|
* Construct a 64-bit map from a 32-bit one
|
|
*/
|
|
explicit Roaring64Map(const roaring::Roaring& r) { emplaceOrInsert(0, r); }
|
|
|
|
/**
|
|
* Construct a roaring object from the C struct.
|
|
*
|
|
* Passing a nullptr point is unsafe.
|
|
*/
|
|
explicit Roaring64Map(roaring_bitmap_t* s) {
|
|
roaring::Roaring r(s);
|
|
emplaceOrInsert(0, r);
|
|
}
|
|
|
|
Roaring64Map(const Roaring64Map& r) : roarings(r.roarings), copyOnWrite(r.copyOnWrite) {}
|
|
|
|
Roaring64Map(Roaring64Map&& r) : roarings(r.roarings), copyOnWrite(r.copyOnWrite) {}
|
|
|
|
/**
|
|
* Assignment operator.
|
|
*/
|
|
Roaring64Map& operator=(const Roaring64Map& r) {
|
|
roarings = r.roarings;
|
|
return *this;
|
|
}
|
|
|
|
/**
|
|
* Construct a bitmap from a list of integer values.
|
|
*/
|
|
static Roaring64Map bitmapOf(size_t n...) {
|
|
Roaring64Map ans;
|
|
va_list vl;
|
|
va_start(vl, n);
|
|
for (size_t i = 0; i < n; i++) {
|
|
ans.add(va_arg(vl, uint64_t));
|
|
}
|
|
va_end(vl);
|
|
return ans;
|
|
}
|
|
|
|
/**
|
|
* Add value x
|
|
*
|
|
*/
|
|
void add(uint32_t x) {
|
|
roarings[0].add(x);
|
|
roarings[0].setCopyOnWrite(copyOnWrite);
|
|
}
|
|
void add(uint64_t x) {
|
|
roarings[highBytes(x)].add(lowBytes(x));
|
|
roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
|
|
}
|
|
|
|
/**
|
|
* Add value x
|
|
* Returns true if a new value was added, false if the value was already existing.
|
|
*/
|
|
bool addChecked(uint32_t x) {
|
|
bool result = roarings[0].addChecked(x);
|
|
roarings[0].setCopyOnWrite(copyOnWrite);
|
|
return result;
|
|
}
|
|
bool addChecked(uint64_t x) {
|
|
bool result = roarings[highBytes(x)].addChecked(lowBytes(x));
|
|
roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Add value n_args from pointer vals
|
|
*
|
|
*/
|
|
void addMany(size_t n_args, const uint32_t* vals) {
|
|
for (size_t lcv = 0; lcv < n_args; lcv++) {
|
|
roarings[0].add(vals[lcv]);
|
|
roarings[0].setCopyOnWrite(copyOnWrite);
|
|
}
|
|
}
|
|
void addMany(size_t n_args, const uint64_t* vals) {
|
|
for (size_t lcv = 0; lcv < n_args; lcv++) {
|
|
roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
|
|
roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Remove value x
|
|
*
|
|
*/
|
|
void remove(uint32_t x) { roarings[0].remove(x); }
|
|
void remove(uint64_t x) {
|
|
auto roaring_iter = roarings.find(highBytes(x));
|
|
if (roaring_iter != roarings.cend()) roaring_iter->second.remove(lowBytes(x));
|
|
}
|
|
|
|
/**
|
|
* Remove value x
|
|
* Returns true if a new value was removed, false if the value was not existing.
|
|
*/
|
|
bool removeChecked(uint32_t x) { return roarings[0].removeChecked(x); }
|
|
bool removeChecked(uint64_t x) {
|
|
auto roaring_iter = roarings.find(highBytes(x));
|
|
if (roaring_iter != roarings.cend()) return roaring_iter->second.removeChecked(lowBytes(x));
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Return the largest value (if not empty)
|
|
*
|
|
*/
|
|
uint64_t maximum() const {
|
|
for (auto roaring_iter = roarings.crbegin(); roaring_iter != roarings.crend();
|
|
++roaring_iter) {
|
|
if (!roaring_iter->second.isEmpty()) {
|
|
return uniteBytes(roaring_iter->first, roaring_iter->second.maximum());
|
|
}
|
|
}
|
|
// we put std::numeric_limits<>::max/min in parenthesis
|
|
// to avoid a clash with the Windows.h header under Windows
|
|
return (std::numeric_limits<uint64_t>::min)();
|
|
}
|
|
|
|
/**
|
|
* Return the smallest value (if not empty)
|
|
*
|
|
*/
|
|
uint64_t minimum() const {
|
|
for (auto roaring_iter = roarings.cbegin(); roaring_iter != roarings.cend();
|
|
++roaring_iter) {
|
|
if (!roaring_iter->second.isEmpty()) {
|
|
return uniteBytes(roaring_iter->first, roaring_iter->second.minimum());
|
|
}
|
|
}
|
|
// we put std::numeric_limits<>::max/min in parenthesis
|
|
// to avoid a clash with the Windows.h header under Windows
|
|
return (std::numeric_limits<uint64_t>::max)();
|
|
}
|
|
|
|
/**
|
|
* Check if value x is present
|
|
*/
|
|
bool contains(uint32_t x) const {
|
|
return roarings.count(0) == 0 ? false : roarings.at(0).contains(x);
|
|
}
|
|
bool contains(uint64_t x) const {
|
|
return roarings.count(highBytes(x)) == 0 ? false
|
|
: roarings.at(highBytes(x)).contains(lowBytes(x));
|
|
}
|
|
|
|
/**
|
|
* Compute the intersection between the current bitmap and the provided
|
|
* bitmap,
|
|
* writing the result in the current bitmap. The provided bitmap is not
|
|
* modified.
|
|
*/
|
|
Roaring64Map& operator&=(const Roaring64Map& r) {
|
|
for (auto& map_entry : roarings) {
|
|
if (r.roarings.count(map_entry.first) == 1)
|
|
map_entry.second &= r.roarings.at(map_entry.first);
|
|
else
|
|
map_entry.second = roaring::Roaring();
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
/**
|
|
* Compute the difference between the current bitmap and the provided
|
|
* bitmap,
|
|
* writing the result in the current bitmap. The provided bitmap is not
|
|
* modified.
|
|
*/
|
|
Roaring64Map& operator-=(const Roaring64Map& r) {
|
|
for (auto& map_entry : roarings) {
|
|
if (r.roarings.count(map_entry.first) == 1)
|
|
map_entry.second -= r.roarings.at(map_entry.first);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
/**
|
|
* Compute the union between the current bitmap and the provided bitmap,
|
|
* writing the result in the current bitmap. The provided bitmap is not
|
|
* modified.
|
|
*
|
|
* See also the fastunion function to aggregate many bitmaps more quickly.
|
|
*/
|
|
Roaring64Map& operator|=(const Roaring64Map& r) {
|
|
for (const auto& map_entry : r.roarings) {
|
|
if (roarings.count(map_entry.first) == 0) {
|
|
roarings[map_entry.first] = map_entry.second;
|
|
roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
|
|
} else
|
|
roarings[map_entry.first] |= map_entry.second;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
/**
|
|
* Compute the symmetric union between the current bitmap and the provided
|
|
* bitmap,
|
|
* writing the result in the current bitmap. The provided bitmap is not
|
|
* modified.
|
|
*/
|
|
Roaring64Map& operator^=(const Roaring64Map& r) {
|
|
for (const auto& map_entry : r.roarings) {
|
|
if (roarings.count(map_entry.first) == 0) {
|
|
roarings[map_entry.first] = map_entry.second;
|
|
roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
|
|
} else
|
|
roarings[map_entry.first] ^= map_entry.second;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
/**
|
|
* Exchange the content of this bitmap with another.
|
|
*/
|
|
void swap(Roaring64Map& r) { roarings.swap(r.roarings); }
|
|
|
|
/**
|
|
* Get the cardinality of the bitmap (number of elements).
|
|
* Throws std::length_error in the special case where the bitmap is full
|
|
* (cardinality() == 2^64). Check isFull() before calling to avoid
|
|
* exception.
|
|
*/
|
|
uint64_t cardinality() const {
|
|
if (isFull()) {
|
|
throw std::length_error(
|
|
"bitmap is full, cardinality is 2^64, "
|
|
"unable to represent in a 64-bit integer");
|
|
}
|
|
return std::accumulate(roarings.cbegin(), roarings.cend(), (uint64_t)0,
|
|
[](uint64_t previous,
|
|
const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
|
|
return previous + map_entry.second.cardinality();
|
|
});
|
|
}
|
|
/**
|
|
* Computes the size of the intersection between two bitmaps.
|
|
*
|
|
*/
|
|
uint64_t andCardinality(const Roaring64Map& r) const {
|
|
uint64_t card = 0;
|
|
for (auto& map_entry : roarings) {
|
|
if (r.roarings.count(map_entry.first) == 1) {
|
|
card += map_entry.second.and_cardinality(r.roarings.at(map_entry.first));
|
|
}
|
|
}
|
|
return card;
|
|
}
|
|
|
|
/**
|
|
* Computes the size of the union between two bitmaps.
|
|
*
|
|
*/
|
|
uint64_t orCardinality(const Roaring64Map& r) const {
|
|
uint64_t card = 0;
|
|
for (const auto& map_entry : roarings) {
|
|
if (r.roarings.count(map_entry.first) == 0) {
|
|
card += map_entry.second.cardinality();
|
|
}
|
|
}
|
|
for (const auto& map_entry : r.roarings) {
|
|
if (roarings.count(map_entry.first) == 0) {
|
|
card += map_entry.second.cardinality();
|
|
} else {
|
|
card += roarings.at(map_entry.first).or_cardinality(map_entry.second);
|
|
}
|
|
}
|
|
return card;
|
|
}
|
|
|
|
/**
|
|
* Computes the size of the difference (andnot) between two bitmaps.
|
|
* r1.cardinality - (r1 & r2).cardinality
|
|
*/
|
|
uint64_t andnotCardinality(const Roaring64Map& r) const {
|
|
uint64_t card = 0;
|
|
for (const auto& map_entry : roarings) {
|
|
card += map_entry.second.cardinality();
|
|
if (r.roarings.count(map_entry.first) == 1) {
|
|
card -= r.roarings.at(map_entry.first).and_cardinality(map_entry.second);
|
|
}
|
|
}
|
|
return card;
|
|
}
|
|
|
|
/**
|
|
* Computes the size of the symmetric difference (andnot) between two
|
|
* bitmaps.
|
|
* r1.cardinality + r2.cardinality - 2 * (r1 & r2).cardinality
|
|
*/
|
|
uint64_t xorCardinality(const Roaring64Map& r) const {
|
|
uint64_t card = 0;
|
|
for (const auto& map_entry : roarings) {
|
|
card += map_entry.second.cardinality();
|
|
}
|
|
for (const auto& map_entry : r.roarings) {
|
|
card += map_entry.second.cardinality();
|
|
if (roarings.count(map_entry.first) == 1) {
|
|
card -= 2 * roarings.at(map_entry.first).and_cardinality(map_entry.second);
|
|
}
|
|
}
|
|
return card;
|
|
}
|
|
|
|
/**
|
|
* Returns true if the bitmap is empty (cardinality is zero).
|
|
*/
|
|
bool isEmpty() const {
|
|
return std::all_of(roarings.cbegin(), roarings.cend(),
|
|
[](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
|
|
return map_entry.second.isEmpty();
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Returns true if the bitmap is full (cardinality is max uint64_t + 1).
|
|
*/
|
|
bool isFull() const {
|
|
// only bother to check if map is fully saturated
|
|
//
|
|
// we put std::numeric_limits<>::max/min in parenthesis
|
|
// to avoid a clash with the Windows.h header under Windows
|
|
return roarings.size() == ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1
|
|
? std::all_of(roarings.cbegin(), roarings.cend(),
|
|
[](const std::pair<const uint32_t, roaring::Roaring>&
|
|
roaring_map_entry) {
|
|
// roarings within map are saturated if cardinality
|
|
// is uint32_t max + 1
|
|
return roaring_map_entry.second.cardinality() ==
|
|
((uint64_t)(std::numeric_limits<uint32_t>::max)()) +
|
|
1;
|
|
})
|
|
: false;
|
|
}
|
|
|
|
/**
|
|
* Returns true if the bitmap is subset of the other.
|
|
*/
|
|
bool isSubset(const Roaring64Map& r) const {
|
|
for (const auto& map_entry : roarings) {
|
|
auto roaring_iter = r.roarings.find(map_entry.first);
|
|
if (roaring_iter == r.roarings.cend())
|
|
return false;
|
|
else if (!map_entry.second.isSubset(roaring_iter->second))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Returns true if the bitmap is strict subset of the other.
|
|
* Throws std::length_error in the special case where the bitmap is full
|
|
* (cardinality() == 2^64). Check isFull() before calling to avoid exception.
|
|
*/
|
|
bool isStrictSubset(const Roaring64Map& r) const {
|
|
return isSubset(r) && cardinality() != r.cardinality();
|
|
}
|
|
|
|
/**
|
|
* Convert the bitmap to an array. Write the output to "ans",
|
|
* caller is responsible to ensure that there is enough memory
|
|
* allocated
|
|
* (e.g., ans = new uint32[mybitmap.cardinality()];)
|
|
*/
|
|
void toUint64Array(uint64_t* ans) const {
|
|
// Annoyingly, VS 2017 marks std::accumulate() as [[nodiscard]]
|
|
(void)std::accumulate(roarings.cbegin(), roarings.cend(), ans,
|
|
[](uint64_t* previous,
|
|
const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
|
|
for (uint32_t low_bits : map_entry.second)
|
|
*previous++ = uniteBytes(map_entry.first, low_bits);
|
|
return previous;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Return true if the two bitmaps contain the same elements.
|
|
*/
|
|
bool operator==(const Roaring64Map& r) const {
|
|
// we cannot use operator == on the map because either side may contain
|
|
// empty Roaring Bitmaps
|
|
auto lhs_iter = roarings.cbegin();
|
|
auto rhs_iter = r.roarings.cbegin();
|
|
do {
|
|
// if the left map has reached its end, ensure that the right map
|
|
// contains only empty Bitmaps
|
|
if (lhs_iter == roarings.cend()) {
|
|
while (rhs_iter != r.roarings.cend()) {
|
|
if (rhs_iter->second.isEmpty()) {
|
|
++rhs_iter;
|
|
continue;
|
|
}
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
// if the left map has an empty bitmap, skip it
|
|
if (lhs_iter->second.isEmpty()) {
|
|
++lhs_iter;
|
|
continue;
|
|
}
|
|
|
|
do {
|
|
// if the right map has reached its end, ensure that the right
|
|
// map contains only empty Bitmaps
|
|
if (rhs_iter == r.roarings.cend()) {
|
|
while (lhs_iter != roarings.cend()) {
|
|
if (lhs_iter->second.isEmpty()) {
|
|
++lhs_iter;
|
|
continue;
|
|
}
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
// if the right map has an empty bitmap, skip it
|
|
if (rhs_iter->second.isEmpty()) {
|
|
++rhs_iter;
|
|
continue;
|
|
}
|
|
} while (false);
|
|
// if neither map has reached its end ensure elements are equal and
|
|
// move to the next element in both
|
|
} while (lhs_iter++->second == rhs_iter++->second);
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* compute the negation of the roaring bitmap within a specified interval.
|
|
* areas outside the range are passed through unchanged.
|
|
*/
|
|
void flip(uint64_t range_start, uint64_t range_end) {
|
|
uint32_t start_high = highBytes(range_start);
|
|
uint32_t start_low = lowBytes(range_start);
|
|
uint32_t end_high = highBytes(range_end);
|
|
uint32_t end_low = lowBytes(range_end);
|
|
|
|
if (start_high == end_high) {
|
|
roarings[start_high].flip(start_low, end_low);
|
|
return;
|
|
}
|
|
// we put std::numeric_limits<>::max/min in parenthesis
|
|
// to avoid a clash with the Windows.h header under Windows
|
|
roarings[start_high].flip(start_low, (std::numeric_limits<uint32_t>::max)());
|
|
roarings[start_high++].setCopyOnWrite(copyOnWrite);
|
|
|
|
for (; start_high <= highBytes(range_end) - 1; ++start_high) {
|
|
roarings[start_high].flip((std::numeric_limits<uint32_t>::min)(),
|
|
(std::numeric_limits<uint32_t>::max)());
|
|
roarings[start_high].setCopyOnWrite(copyOnWrite);
|
|
}
|
|
|
|
roarings[start_high].flip((std::numeric_limits<uint32_t>::min)(), end_low);
|
|
roarings[start_high].setCopyOnWrite(copyOnWrite);
|
|
}
|
|
|
|
/**
|
|
* Remove run-length encoding even when it is more space efficient
|
|
* return whether a change was applied
|
|
*/
|
|
bool removeRunCompression() {
|
|
return std::accumulate(
|
|
roarings.begin(), roarings.end(), true,
|
|
[](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) {
|
|
return map_entry.second.removeRunCompression() && previous;
|
|
});
|
|
}
|
|
|
|
/** convert array and bitmap containers to run containers when it is more
|
|
* efficient;
|
|
* also convert from run containers when more space efficient. Returns
|
|
* true if the result has at least one run container.
|
|
* Additional savings might be possible by calling shrinkToFit().
|
|
*/
|
|
bool runOptimize() {
|
|
return std::accumulate(
|
|
roarings.begin(), roarings.end(), true,
|
|
[](bool previous, std::pair<const uint32_t, roaring::Roaring>& map_entry) {
|
|
return map_entry.second.runOptimize() && previous;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* If needed, reallocate memory to shrink the memory usage. Returns
|
|
* the number of bytes saved.
|
|
*/
|
|
size_t shrinkToFit() {
|
|
size_t savedBytes = 0;
|
|
auto iter = roarings.begin();
|
|
while (iter != roarings.cend()) {
|
|
if (iter->second.isEmpty()) {
|
|
// empty Roarings are 84 bytes
|
|
savedBytes += 88;
|
|
iter = roarings.erase(iter);
|
|
} else {
|
|
savedBytes += iter->second.shrinkToFit();
|
|
iter++;
|
|
}
|
|
}
|
|
return savedBytes;
|
|
}
|
|
|
|
/**
|
|
* Iterate over the bitmap elements. The function iterator is called once
|
|
* for all the values with ptr (can be nullptr) as the second parameter of each
|
|
* call.
|
|
*
|
|
* roaring_iterator is simply a pointer to a function that returns bool
|
|
* (true means that the iteration should continue while false means that it
|
|
* should stop), and takes (uint32_t,void*) as inputs.
|
|
*/
|
|
void iterate(roaring::api::roaring_iterator64 iterator, void* ptr) const {
|
|
for (const auto& map_entry : roarings) {
|
|
bool should_continue = roaring_iterate64(&map_entry.second.roaring, iterator,
|
|
uint64_t(map_entry.first) << 32, ptr);
|
|
if (!should_continue) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* If the size of the roaring bitmap is strictly greater than rank, then
|
|
this
|
|
function returns true and set element to the element of given rank.
|
|
Otherwise, it returns false.
|
|
*/
|
|
bool select(uint64_t rnk, uint64_t* element) const {
|
|
for (const auto& map_entry : roarings) {
|
|
uint64_t sub_cardinality = (uint64_t)map_entry.second.cardinality();
|
|
if (rnk < sub_cardinality) {
|
|
*element = ((uint64_t)map_entry.first) << 32;
|
|
// assuming little endian
|
|
return map_entry.second.select((uint32_t)rnk, ((uint32_t*)element));
|
|
}
|
|
rnk -= sub_cardinality;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Returns the number of integers that are smaller or equal to x.
|
|
*/
|
|
uint64_t rank(uint64_t x) const {
|
|
uint64_t result = 0;
|
|
auto roaring_destination = roarings.find(highBytes(x));
|
|
if (roaring_destination != roarings.cend()) {
|
|
for (auto roaring_iter = roarings.cbegin(); roaring_iter != roaring_destination;
|
|
++roaring_iter) {
|
|
result += roaring_iter->second.cardinality();
|
|
}
|
|
result += roaring_destination->second.rank(lowBytes(x));
|
|
return result;
|
|
}
|
|
roaring_destination = roarings.lower_bound(highBytes(x));
|
|
for (auto roaring_iter = roarings.cbegin(); roaring_iter != roaring_destination;
|
|
++roaring_iter) {
|
|
result += roaring_iter->second.cardinality();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* write a bitmap to a char buffer.
|
|
* Returns how many bytes were written which should be getSizeInBytes().
|
|
*/
|
|
size_t write(char* buf) const {
|
|
if (is32BitsEnough()) {
|
|
*(buf++) = BitmapTypeCode::type::BITMAP32;
|
|
auto it = roarings.find(0);
|
|
if (it == roarings.end()) { // empty bitmap
|
|
roaring::Roaring r;
|
|
return r.write(buf) + 1;
|
|
}
|
|
return it->second.write(buf) + 1;
|
|
}
|
|
|
|
const char* orig = buf;
|
|
// put type code
|
|
*(buf++) = BitmapTypeCode::type::BITMAP64;
|
|
// push map size
|
|
buf = (char*)encode_varint64((uint8_t*)buf, roarings.size());
|
|
std::for_each(roarings.cbegin(), roarings.cend(),
|
|
[&buf](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
|
|
// push map key
|
|
encode_fixed32_le((uint8_t*)buf, map_entry.first);
|
|
buf += sizeof(uint32_t);
|
|
// push map value Roaring
|
|
buf += map_entry.second.write(buf);
|
|
});
|
|
return buf - orig;
|
|
}
|
|
|
|
/**
|
|
* read a bitmap from a serialized version.
|
|
*
|
|
* This function is unsafe in the sense that if you provide bad data,
|
|
* many bytes could be read, possibly causing a buffer overflow. See also readSafe.
|
|
*/
|
|
static Roaring64Map read(const char* buf) {
|
|
Roaring64Map result;
|
|
|
|
if (*buf == BitmapTypeCode::BITMAP32) {
|
|
roaring::Roaring read = roaring::Roaring::read(buf + 1);
|
|
result.emplaceOrInsert(0, std::move(read));
|
|
return result;
|
|
}
|
|
|
|
DCHECK_EQ(BitmapTypeCode::BITMAP64, *buf);
|
|
buf++;
|
|
|
|
// get map size (varint64 took 1~10 bytes)
|
|
uint64_t map_size;
|
|
buf = reinterpret_cast<const char*>(
|
|
decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf),
|
|
reinterpret_cast<const uint8_t*>(buf + 10), &map_size));
|
|
DCHECK(buf != nullptr);
|
|
for (uint64_t lcv = 0; lcv < map_size; lcv++) {
|
|
// get map key
|
|
uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf));
|
|
buf += sizeof(uint32_t);
|
|
// read map value Roaring
|
|
roaring::Roaring read_var = roaring::Roaring::read(buf);
|
|
// forward buffer past the last Roaring Bitmap
|
|
buf += read_var.getSizeInBytes();
|
|
result.emplaceOrInsert(key, std::move(read_var));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* How many bytes are required to serialize this bitmap
|
|
*/
|
|
size_t getSizeInBytes() const {
|
|
if (is32BitsEnough()) {
|
|
auto it = roarings.find(0);
|
|
if (it == roarings.end()) { // empty bitmap
|
|
roaring::Roaring r;
|
|
return r.getSizeInBytes() + 1;
|
|
}
|
|
return it->second.getSizeInBytes() + 1;
|
|
}
|
|
// start with type code, map size and size of keys for each map entry
|
|
size_t init = 1 + varint_length(roarings.size()) + roarings.size() * sizeof(uint32_t);
|
|
return std::accumulate(
|
|
roarings.cbegin(), roarings.cend(), init,
|
|
[=](size_t previous, const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
|
|
// add in bytes used by each Roaring
|
|
return previous + map_entry.second.getSizeInBytes();
|
|
});
|
|
}
|
|
|
|
/**
|
|
* remove all elements
|
|
*/
|
|
void clear() { roarings.clear(); }
|
|
|
|
/**
|
|
* Return whether all elements can be represented in 32 bits
|
|
*/
|
|
bool is32BitsEnough() const { return maximum() <= std::numeric_limits<uint32_t>::max(); }
|
|
|
|
/**
|
|
* Computes the intersection between two bitmaps and returns new bitmap.
|
|
* The current bitmap and the provided bitmap are unchanged.
|
|
*/
|
|
Roaring64Map operator&(const Roaring64Map& o) const { return Roaring64Map(*this) &= o; }
|
|
|
|
/**
|
|
* Computes the difference between two bitmaps and returns new bitmap.
|
|
* The current bitmap and the provided bitmap are unchanged.
|
|
*/
|
|
Roaring64Map operator-(const Roaring64Map& o) const { return Roaring64Map(*this) -= o; }
|
|
|
|
/**
|
|
* Computes the union between two bitmaps and returns new bitmap.
|
|
* The current bitmap and the provided bitmap are unchanged.
|
|
*/
|
|
Roaring64Map operator|(const Roaring64Map& o) const { return Roaring64Map(*this) |= o; }
|
|
|
|
/**
|
|
* Computes the symmetric union between two bitmaps and returns new bitmap.
|
|
* The current bitmap and the provided bitmap are unchanged.
|
|
*/
|
|
Roaring64Map operator^(const Roaring64Map& o) const { return Roaring64Map(*this) ^= o; }
|
|
|
|
/**
|
|
* Whether or not we apply copy and write.
|
|
*/
|
|
void setCopyOnWrite(bool val) {
|
|
if (copyOnWrite == val) return;
|
|
copyOnWrite = val;
|
|
std::for_each(roarings.begin(), roarings.end(),
|
|
[=](std::pair<const uint32_t, roaring::Roaring>& map_entry) {
|
|
map_entry.second.setCopyOnWrite(val);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Print the content of the bitmap
|
|
*/
|
|
void printf() const {
|
|
if (!isEmpty()) {
|
|
auto map_iter = roarings.cbegin();
|
|
while (map_iter->second.isEmpty()) ++map_iter;
|
|
struct iter_data {
|
|
uint32_t high_bits;
|
|
char first_char = '{';
|
|
} outer_iter_data;
|
|
outer_iter_data.high_bits = roarings.begin()->first;
|
|
map_iter->second.iterate(
|
|
[](uint32_t low_bits, void* inner_iter_data) -> bool {
|
|
std::printf("%c%llu", ((iter_data*)inner_iter_data)->first_char,
|
|
(long long unsigned)uniteBytes(
|
|
((iter_data*)inner_iter_data)->high_bits, low_bits));
|
|
((iter_data*)inner_iter_data)->first_char = ',';
|
|
return true;
|
|
},
|
|
(void*)&outer_iter_data);
|
|
std::for_each(++map_iter, roarings.cend(),
|
|
[](const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
|
|
map_entry.second.iterate(
|
|
[](uint32_t low_bits, void* high_bits) -> bool {
|
|
std::printf(",%llu",
|
|
(long long unsigned)uniteBytes(
|
|
*(uint32_t*)high_bits, low_bits));
|
|
return true;
|
|
},
|
|
(void*)&map_entry.first);
|
|
});
|
|
} else
|
|
std::printf("{");
|
|
std::printf("}\n");
|
|
}
|
|
|
|
/**
|
|
* Print the content of the bitmap into a string
|
|
*/
|
|
std::string toString() const {
|
|
struct iter_data {
|
|
std::string str {};
|
|
uint32_t high_bits;
|
|
char first_char {'{'};
|
|
} outer_iter_data;
|
|
if (!isEmpty()) {
|
|
auto map_iter = roarings.cbegin();
|
|
while (map_iter->second.isEmpty()) ++map_iter;
|
|
outer_iter_data.high_bits = roarings.begin()->first;
|
|
map_iter->second.iterate(
|
|
[](uint32_t low_bits, void* inner_iter_data) -> bool {
|
|
((iter_data*)inner_iter_data)->str +=
|
|
((iter_data*)inner_iter_data)->first_char;
|
|
((iter_data*)inner_iter_data)->str += std::to_string(
|
|
uniteBytes(((iter_data*)inner_iter_data)->high_bits, low_bits));
|
|
((iter_data*)inner_iter_data)->first_char = ',';
|
|
return true;
|
|
},
|
|
(void*)&outer_iter_data);
|
|
std::for_each(
|
|
++map_iter, roarings.cend(),
|
|
[&outer_iter_data](
|
|
const std::pair<const uint32_t, roaring::Roaring>& map_entry) {
|
|
outer_iter_data.high_bits = map_entry.first;
|
|
map_entry.second.iterate(
|
|
[](uint32_t low_bits, void* inner_iter_data) -> bool {
|
|
((iter_data*)inner_iter_data)->str +=
|
|
((iter_data*)inner_iter_data)->first_char;
|
|
((iter_data*)inner_iter_data)->str += std::to_string(uniteBytes(
|
|
((iter_data*)inner_iter_data)->high_bits, low_bits));
|
|
return true;
|
|
},
|
|
(void*)&outer_iter_data);
|
|
});
|
|
} else
|
|
outer_iter_data.str = '{';
|
|
outer_iter_data.str += '}';
|
|
return outer_iter_data.str;
|
|
}
|
|
|
|
/**
|
|
* Whether or not copy and write is active.
|
|
*/
|
|
bool getCopyOnWrite() const { return copyOnWrite; }
|
|
|
|
/**
|
|
* computes the logical or (union) between "n" bitmaps (referenced by a
|
|
* pointer).
|
|
*/
|
|
static Roaring64Map fastunion(size_t n, const Roaring64Map** inputs) {
|
|
Roaring64Map ans;
|
|
// not particularly fast
|
|
for (size_t lcv = 0; lcv < n; ++lcv) {
|
|
ans |= *(inputs[lcv]);
|
|
}
|
|
return ans;
|
|
}
|
|
|
|
friend class Roaring64MapSetBitForwardIterator;
|
|
friend class Roaring64MapSetBitBiDirectionalIterator;
|
|
typedef Roaring64MapSetBitForwardIterator const_iterator;
|
|
typedef Roaring64MapSetBitBiDirectionalIterator const_bidirectional_iterator;
|
|
|
|
/**
|
|
* Returns an iterator that can be used to access the position of the
|
|
* set bits. The running time complexity of a full scan is proportional to
|
|
* the
|
|
* number
|
|
* of set bits: be aware that if you have long strings of 1s, this can be
|
|
* very inefficient.
|
|
*
|
|
* It can be much faster to use the toArray method if you want to
|
|
* retrieve the set bits.
|
|
*/
|
|
const_iterator begin() const;
|
|
|
|
/**
|
|
* A bogus iterator that can be used together with begin()
|
|
* for constructions such as for(auto i = b.begin();
|
|
* i!=b.end(); ++i) {}
|
|
*/
|
|
const_iterator end() const;
|
|
|
|
private:
|
|
phmap::btree_map<uint32_t, roaring::Roaring> roarings {};
|
|
bool copyOnWrite {false};
|
|
static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
|
|
static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
|
|
static uint64_t uniteBytes(const uint32_t highBytes, const uint32_t lowBytes) {
|
|
return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
|
|
}
|
|
void emplaceOrInsert(const uint32_t key, const roaring::Roaring& value) {
|
|
roarings.emplace(std::make_pair(key, value));
|
|
}
|
|
|
|
void emplaceOrInsert(const uint32_t key, roaring::Roaring&& value) {
|
|
roarings.emplace(key, value);
|
|
}
|
|
};
|
|
|
|
// Forked from https://github.com/RoaringBitmap/CRoaring/blob/v0.4.0/cpp/roaring64map.hh
|
|
// Used to go through the set bits. Not optimally fast, but convenient.
|
|
class Roaring64MapSetBitForwardIterator {
|
|
public:
|
|
typedef std::forward_iterator_tag iterator_category;
|
|
typedef uint64_t* pointer;
|
|
typedef uint64_t& reference_type;
|
|
typedef uint64_t value_type;
|
|
typedef int64_t difference_type;
|
|
typedef Roaring64MapSetBitForwardIterator type_of_iterator;
|
|
|
|
/**
|
|
* Provides the location of the set bit.
|
|
*/
|
|
value_type operator*() const {
|
|
return Roaring64Map::uniteBytes(map_iter->first, i.current_value);
|
|
}
|
|
|
|
bool operator<(const type_of_iterator& o) const {
|
|
if (map_iter == map_end) return false;
|
|
if (o.map_iter == o.map_end) return true;
|
|
return **this < *o;
|
|
}
|
|
|
|
bool operator<=(const type_of_iterator& o) const {
|
|
if (o.map_iter == o.map_end) return true;
|
|
if (map_iter == map_end) return false;
|
|
return **this <= *o;
|
|
}
|
|
|
|
bool operator>(const type_of_iterator& o) const {
|
|
if (o.map_iter == o.map_end) return false;
|
|
if (map_iter == map_end) return true;
|
|
return **this > *o;
|
|
}
|
|
|
|
bool operator>=(const type_of_iterator& o) const {
|
|
if (map_iter == map_end) return true;
|
|
if (o.map_iter == o.map_end) return false;
|
|
return **this >= *o;
|
|
}
|
|
|
|
type_of_iterator& operator++() { // ++i, must returned inc. value
|
|
if (i.has_value == true) roaring_advance_uint32_iterator(&i);
|
|
while (!i.has_value) {
|
|
map_iter++;
|
|
if (map_iter == map_end) return *this;
|
|
roaring_init_iterator(&map_iter->second.roaring, &i);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
type_of_iterator operator++(int) { // i++, must return orig. value
|
|
Roaring64MapSetBitForwardIterator orig(*this);
|
|
roaring_advance_uint32_iterator(&i);
|
|
while (!i.has_value) {
|
|
map_iter++;
|
|
if (map_iter == map_end) return orig;
|
|
roaring_init_iterator(&map_iter->second.roaring, &i);
|
|
}
|
|
return orig;
|
|
}
|
|
|
|
bool move(const value_type& x) {
|
|
map_iter = p.lower_bound(Roaring64Map::highBytes(x));
|
|
if (map_iter != p.cend()) {
|
|
roaring_init_iterator(&map_iter->second.roaring, &i);
|
|
if (map_iter->first == Roaring64Map::highBytes(x)) {
|
|
if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x)))
|
|
return true;
|
|
map_iter++;
|
|
if (map_iter == map_end) return false;
|
|
roaring_init_iterator(&map_iter->second.roaring, &i);
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool operator==(const Roaring64MapSetBitForwardIterator& o) const {
|
|
if (map_iter == map_end && o.map_iter == o.map_end) return true;
|
|
if (o.map_iter == o.map_end) return false;
|
|
return **this == *o;
|
|
}
|
|
|
|
bool operator!=(const Roaring64MapSetBitForwardIterator& o) const {
|
|
if (map_iter == map_end && o.map_iter == o.map_end) return false;
|
|
if (o.map_iter == o.map_end) return true;
|
|
return **this != *o;
|
|
}
|
|
Roaring64MapSetBitForwardIterator& operator=(const Roaring64MapSetBitForwardIterator& r) {
|
|
map_iter = r.map_iter;
|
|
map_end = r.map_end;
|
|
i = r.i;
|
|
return *this;
|
|
}
|
|
|
|
Roaring64MapSetBitForwardIterator(const Roaring64MapSetBitForwardIterator& r)
|
|
: p(r.p), map_iter(r.map_iter), map_end(r.map_end), i(r.i) {}
|
|
|
|
Roaring64MapSetBitForwardIterator(const Roaring64Map& parent, bool exhausted = false)
|
|
: p(parent.roarings), map_end(parent.roarings.cend()) {
|
|
if (exhausted || parent.roarings.empty()) {
|
|
map_iter = parent.roarings.cend();
|
|
} else {
|
|
map_iter = parent.roarings.cbegin();
|
|
roaring_init_iterator(&map_iter->second.roaring, &i);
|
|
while (!i.has_value) {
|
|
map_iter++;
|
|
if (map_iter == map_end) return;
|
|
roaring_init_iterator(&map_iter->second.roaring, &i);
|
|
}
|
|
}
|
|
}
|
|
|
|
protected:
|
|
const phmap::btree_map<uint32_t, roaring::Roaring>& p;
|
|
phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_iter {};
|
|
phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_end {};
|
|
roaring::api::roaring_uint32_iterator_t i {};
|
|
};
|
|
|
|
class Roaring64MapSetBitBiDirectionalIterator final : public Roaring64MapSetBitForwardIterator {
|
|
public:
|
|
explicit Roaring64MapSetBitBiDirectionalIterator(const Roaring64Map& parent,
|
|
bool exhausted = false)
|
|
: Roaring64MapSetBitForwardIterator(parent, exhausted),
|
|
map_begin(parent.roarings.cbegin()) {}
|
|
|
|
Roaring64MapSetBitBiDirectionalIterator& operator=(const Roaring64MapSetBitForwardIterator& r) {
|
|
*(Roaring64MapSetBitForwardIterator*)this = r;
|
|
return *this;
|
|
}
|
|
|
|
Roaring64MapSetBitBiDirectionalIterator& operator--() { // --i, must return dec.value
|
|
if (map_iter == map_end) {
|
|
--map_iter;
|
|
roaring_init_iterator_last(&map_iter->second.roaring, &i);
|
|
if (i.has_value) return *this;
|
|
}
|
|
|
|
roaring_previous_uint32_iterator(&i);
|
|
while (!i.has_value) {
|
|
if (map_iter == map_begin) return *this;
|
|
map_iter--;
|
|
roaring_init_iterator_last(&map_iter->second.roaring, &i);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
Roaring64MapSetBitBiDirectionalIterator operator--(int) { // i--, must return orig. value
|
|
Roaring64MapSetBitBiDirectionalIterator orig(*this);
|
|
if (map_iter == map_end) {
|
|
--map_iter;
|
|
roaring_init_iterator_last(&map_iter->second.roaring, &i);
|
|
return orig;
|
|
}
|
|
|
|
roaring_previous_uint32_iterator(&i);
|
|
while (!i.has_value) {
|
|
if (map_iter == map_begin) return orig;
|
|
map_iter--;
|
|
roaring_init_iterator_last(&map_iter->second.roaring, &i);
|
|
}
|
|
return orig;
|
|
}
|
|
|
|
protected:
|
|
phmap::btree_map<uint32_t, roaring::Roaring>::const_iterator map_begin;
|
|
};
|
|
|
|
inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const {
|
|
return Roaring64MapSetBitForwardIterator(*this);
|
|
}
|
|
|
|
inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const {
|
|
return Roaring64MapSetBitForwardIterator(*this, true);
|
|
}
|
|
|
|
} // namespace detail
|
|
|
|
// Represent the in-memory and on-disk structure of Doris's BITMAP data type.
|
|
// Optimize for the case where the bitmap contains 0 or 1 element which is common
|
|
// for streaming load scenario.
|
|
class BitmapValueIterator;
|
|
class BitmapValue {
|
|
public:
|
|
// Construct an empty bitmap.
|
|
BitmapValue() : _type(EMPTY) {}
|
|
|
|
// Construct a bitmap with one element.
|
|
explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE) {}
|
|
|
|
// Construct a bitmap from serialized data.
|
|
explicit BitmapValue(const char* src) {
|
|
bool res = deserialize(src);
|
|
DCHECK(res);
|
|
}
|
|
|
|
// Construct a bitmap from given elements.
|
|
explicit BitmapValue(const std::vector<uint64_t>& bits) {
|
|
switch (bits.size()) {
|
|
case 0:
|
|
_type = EMPTY;
|
|
break;
|
|
case 1:
|
|
_type = SINGLE;
|
|
_sv = bits[0];
|
|
break;
|
|
default:
|
|
_type = BITMAP;
|
|
_bitmap.addMany(bits.size(), &bits[0]);
|
|
}
|
|
}
|
|
|
|
void add(uint64_t value) {
|
|
switch (_type) {
|
|
case EMPTY:
|
|
_sv = value;
|
|
_type = SINGLE;
|
|
break;
|
|
case SINGLE:
|
|
//there is no need to convert the type if two variables are equal
|
|
if (_sv == value) {
|
|
break;
|
|
}
|
|
_bitmap.add(_sv);
|
|
_bitmap.add(value);
|
|
_type = BITMAP;
|
|
break;
|
|
case BITMAP:
|
|
_bitmap.add(value);
|
|
}
|
|
}
|
|
|
|
void remove(uint64_t value) {
|
|
switch (_type) {
|
|
case EMPTY:
|
|
break;
|
|
case SINGLE:
|
|
//there is need to convert the type if two variables are equal
|
|
if (_sv == value) {
|
|
_type = EMPTY;
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
_bitmap.remove(value);
|
|
_convert_to_smaller_type();
|
|
}
|
|
}
|
|
|
|
// Compute the union between the current bitmap and the provided bitmap.
|
|
BitmapValue& operator-=(const BitmapValue& rhs) {
|
|
switch (rhs._type) {
|
|
case EMPTY:
|
|
break;
|
|
case SINGLE:
|
|
remove(rhs._sv);
|
|
break;
|
|
case BITMAP:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
break;
|
|
case SINGLE:
|
|
if (rhs._bitmap.contains(_sv)) {
|
|
_type = EMPTY;
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
_bitmap -= rhs._bitmap;
|
|
_convert_to_smaller_type();
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
// Compute the union between the current bitmap and the provided bitmap.
|
|
// Possible type transitions are:
|
|
// EMPTY -> SINGLE
|
|
// EMPTY -> BITMAP
|
|
// SINGLE -> BITMAP
|
|
BitmapValue& operator|=(const BitmapValue& rhs) {
|
|
switch (rhs._type) {
|
|
case EMPTY:
|
|
break;
|
|
case SINGLE:
|
|
add(rhs._sv);
|
|
break;
|
|
case BITMAP:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
_bitmap = rhs._bitmap;
|
|
_type = BITMAP;
|
|
break;
|
|
case SINGLE:
|
|
_bitmap = rhs._bitmap;
|
|
_bitmap.add(_sv);
|
|
_type = BITMAP;
|
|
break;
|
|
case BITMAP:
|
|
_bitmap |= rhs._bitmap;
|
|
}
|
|
break;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
BitmapValue& fastunion(const std::vector<const BitmapValue*>& values) {
|
|
std::vector<const detail::Roaring64Map*> bitmaps;
|
|
std::vector<uint64_t> single_values;
|
|
for (int i = 0; i < values.size(); ++i) {
|
|
auto* value = values[i];
|
|
switch (value->_type) {
|
|
case EMPTY:
|
|
break;
|
|
case SINGLE:
|
|
single_values.push_back(value->_sv);
|
|
break;
|
|
case BITMAP:
|
|
bitmaps.push_back(&value->_bitmap);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!bitmaps.empty()) {
|
|
switch (_type) {
|
|
case EMPTY:
|
|
_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
|
|
_type = BITMAP;
|
|
break;
|
|
case SINGLE:
|
|
_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
|
|
_bitmap.add(_sv);
|
|
_type = BITMAP;
|
|
break;
|
|
case BITMAP:
|
|
_bitmap |= detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data());
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (_type == EMPTY && single_values.size() == 1) {
|
|
_sv = single_values[0];
|
|
_type = SINGLE;
|
|
} else if (!single_values.empty()) {
|
|
_bitmap.addMany(single_values.size(), single_values.data());
|
|
if (_type == SINGLE) {
|
|
_bitmap.add(_sv);
|
|
}
|
|
_type = BITMAP;
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
// Compute the intersection between the current bitmap and the provided bitmap.
|
|
// Possible type transitions are:
|
|
// SINGLE -> EMPTY
|
|
// BITMAP -> EMPTY
|
|
// BITMAP -> SINGLE
|
|
BitmapValue& operator&=(const BitmapValue& rhs) {
|
|
switch (rhs._type) {
|
|
case EMPTY:
|
|
_type = EMPTY;
|
|
_bitmap.clear();
|
|
break;
|
|
case SINGLE:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
break;
|
|
case SINGLE:
|
|
if (_sv != rhs._sv) {
|
|
_type = EMPTY;
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
if (!_bitmap.contains(rhs._sv)) {
|
|
_type = EMPTY;
|
|
} else {
|
|
_type = SINGLE;
|
|
_sv = rhs._sv;
|
|
}
|
|
_bitmap.clear();
|
|
break;
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
break;
|
|
case SINGLE:
|
|
if (!rhs._bitmap.contains(_sv)) {
|
|
_type = EMPTY;
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
_bitmap &= rhs._bitmap;
|
|
_convert_to_smaller_type();
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
// Compute the symmetric union between the current bitmap and the provided bitmap.
|
|
// Possible type transitions are:
|
|
// SINGLE -> EMPTY
|
|
// BITMAP -> EMPTY
|
|
// BITMAP -> SINGLE
|
|
BitmapValue& operator^=(const BitmapValue& rhs) {
|
|
switch (rhs._type) {
|
|
case EMPTY:
|
|
break;
|
|
case SINGLE:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
add(rhs._sv);
|
|
break;
|
|
case SINGLE:
|
|
if (_sv == rhs._sv) {
|
|
_type = EMPTY;
|
|
_bitmap.clear();
|
|
} else {
|
|
add(rhs._sv);
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
if (!_bitmap.contains(rhs._sv)) {
|
|
add(rhs._sv);
|
|
} else {
|
|
_bitmap.remove(rhs._sv);
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
_bitmap = rhs._bitmap;
|
|
_type = BITMAP;
|
|
break;
|
|
case SINGLE:
|
|
_bitmap = rhs._bitmap;
|
|
_type = BITMAP;
|
|
if (!rhs._bitmap.contains(_sv)) {
|
|
_bitmap.add(_sv);
|
|
} else {
|
|
_bitmap.remove(_sv);
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
_bitmap ^= rhs._bitmap;
|
|
_convert_to_smaller_type();
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
// check if value x is present
|
|
bool contains(uint64_t x) const {
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return false;
|
|
case SINGLE:
|
|
return _sv == x;
|
|
case BITMAP:
|
|
return _bitmap.contains(x);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// true if contains a value that belongs to the range [left, right].
|
|
bool contains_any(uint64_t left, uint64_t right) const;
|
|
|
|
uint64_t cardinality() const {
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return 0;
|
|
case SINGLE:
|
|
return 1;
|
|
case BITMAP:
|
|
return _bitmap.cardinality();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
uint64_t and_cardinality(const BitmapValue& rhs) const {
|
|
switch (rhs._type) {
|
|
case EMPTY:
|
|
return 0;
|
|
case SINGLE:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return 0;
|
|
case SINGLE:
|
|
return _sv == rhs._sv;
|
|
case BITMAP:
|
|
return _bitmap.contains(rhs._sv);
|
|
}
|
|
case BITMAP:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return 0;
|
|
case SINGLE:
|
|
return rhs._bitmap.contains(_sv);
|
|
case BITMAP:
|
|
return _bitmap.andCardinality(rhs._bitmap);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
uint64_t or_cardinality(const BitmapValue& rhs) const {
|
|
switch (rhs._type) {
|
|
case EMPTY:
|
|
return cardinality();
|
|
case SINGLE:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return 1;
|
|
case SINGLE:
|
|
return 1 + (_sv != rhs._sv);
|
|
case BITMAP:
|
|
return cardinality() + !_bitmap.contains(rhs._sv);
|
|
}
|
|
case BITMAP:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return rhs.cardinality();
|
|
case SINGLE:
|
|
return rhs.cardinality() + !rhs._bitmap.contains(_sv);
|
|
case BITMAP:
|
|
return _bitmap.orCardinality(rhs._bitmap);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
uint64_t xor_cardinality(const BitmapValue& rhs) const {
|
|
switch (rhs._type) {
|
|
case EMPTY:
|
|
return cardinality();
|
|
case SINGLE:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return 1;
|
|
case SINGLE:
|
|
return 2 - 2 * (_sv == rhs._sv);
|
|
case BITMAP:
|
|
return cardinality() + 1 - 2 * (_bitmap.contains(rhs._sv));
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return rhs.cardinality();
|
|
case SINGLE:
|
|
return rhs.cardinality() + 1 - 2 * (rhs._bitmap.contains(_sv));
|
|
case BITMAP:
|
|
return _bitmap.xorCardinality(rhs._bitmap);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
uint64_t andnot_cardinality(const BitmapValue& rhs) const {
|
|
switch (rhs._type) {
|
|
case EMPTY:
|
|
return cardinality();
|
|
case SINGLE:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return 0;
|
|
case SINGLE:
|
|
return 1 - _sv == rhs._sv;
|
|
case BITMAP:
|
|
return cardinality() - _bitmap.contains(rhs._sv);
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return 0;
|
|
case SINGLE:
|
|
return !rhs._bitmap.contains(_sv);
|
|
case BITMAP:
|
|
return _bitmap.andnotCardinality(rhs._bitmap);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Return how many bytes are required to serialize this bitmap.
|
|
// See BitmapTypeCode for the serialized format.
|
|
size_t getSizeInBytes() {
|
|
size_t res = 0;
|
|
switch (_type) {
|
|
case EMPTY:
|
|
res = 1;
|
|
break;
|
|
case SINGLE:
|
|
if (_sv <= std::numeric_limits<uint32_t>::max()) {
|
|
res = 1 + sizeof(uint32_t);
|
|
} else {
|
|
res = 1 + sizeof(uint64_t);
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
_bitmap.runOptimize();
|
|
_bitmap.shrinkToFit();
|
|
res = _bitmap.getSizeInBytes();
|
|
break;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
// Serialize the bitmap value to dst, which should be large enough.
|
|
// Client should call `getSizeInBytes` first to get the serialized size.
|
|
void write_to(char* dst) const {
|
|
switch (_type) {
|
|
case EMPTY:
|
|
*dst = BitmapTypeCode::EMPTY;
|
|
break;
|
|
case SINGLE:
|
|
if (_sv <= std::numeric_limits<uint32_t>::max()) {
|
|
*(dst++) = BitmapTypeCode::SINGLE32;
|
|
encode_fixed32_le(reinterpret_cast<uint8_t*>(dst), static_cast<uint32_t>(_sv));
|
|
} else {
|
|
*(dst++) = BitmapTypeCode::SINGLE64;
|
|
encode_fixed64_le(reinterpret_cast<uint8_t*>(dst), _sv);
|
|
}
|
|
break;
|
|
case BITMAP:
|
|
_bitmap.write(dst);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Deserialize a bitmap value from `src`.
|
|
// Return false if `src` begins with unknown type code, true otherwise.
|
|
bool deserialize(const char* src) {
|
|
switch (*src) {
|
|
case BitmapTypeCode::EMPTY:
|
|
_type = EMPTY;
|
|
break;
|
|
case BitmapTypeCode::SINGLE32:
|
|
_type = SINGLE;
|
|
_sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1));
|
|
break;
|
|
case BitmapTypeCode::SINGLE64:
|
|
_type = SINGLE;
|
|
_sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1));
|
|
break;
|
|
case BitmapTypeCode::BITMAP32:
|
|
case BitmapTypeCode::BITMAP64:
|
|
_type = BITMAP;
|
|
_bitmap = detail::Roaring64Map::read(src);
|
|
break;
|
|
default:
|
|
LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
|
|
<< " and " << BitmapTypeCode::BITMAP64 << " actrual is "
|
|
<< static_cast<int>(*src);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
doris::BigIntVal minimum() const {
|
|
switch (_type) {
|
|
case SINGLE:
|
|
return doris::BigIntVal(_sv);
|
|
case BITMAP:
|
|
return doris::BigIntVal(_bitmap.minimum());
|
|
default:
|
|
return doris::BigIntVal::null();
|
|
}
|
|
}
|
|
|
|
// TODO limit string size to avoid OOM
|
|
std::string to_string() const {
|
|
std::stringstream ss;
|
|
switch (_type) {
|
|
case EMPTY:
|
|
break;
|
|
case SINGLE:
|
|
ss << _sv;
|
|
break;
|
|
case BITMAP: {
|
|
struct IterCtx {
|
|
std::stringstream* ss = nullptr;
|
|
bool first = true;
|
|
} iter_ctx;
|
|
iter_ctx.ss = &ss;
|
|
|
|
_bitmap.iterate(
|
|
[](uint64_t value, void* c) -> bool {
|
|
auto ctx = reinterpret_cast<IterCtx*>(c);
|
|
if (ctx->first) {
|
|
ctx->first = false;
|
|
} else {
|
|
(*ctx->ss) << ",";
|
|
}
|
|
(*ctx->ss) << value;
|
|
return true;
|
|
},
|
|
&iter_ctx);
|
|
break;
|
|
}
|
|
}
|
|
return ss.str();
|
|
}
|
|
|
|
doris::BigIntVal maximum() const {
|
|
switch (_type) {
|
|
case SINGLE:
|
|
return doris::BigIntVal(_sv);
|
|
case BITMAP:
|
|
return doris::BigIntVal(_bitmap.maximum());
|
|
default:
|
|
return doris::BigIntVal::null();
|
|
}
|
|
}
|
|
|
|
uint64_t max(bool* empty) const {
|
|
return min_or_max(empty, [&]() { return _bitmap.maximum(); });
|
|
}
|
|
|
|
uint64_t min(bool* empty) const {
|
|
return min_or_max(empty, [&]() { return _bitmap.minimum(); });
|
|
}
|
|
|
|
bool empty() const { return _type == EMPTY; }
|
|
|
|
/**
|
|
* Return new set with specified range (not include the range_end)
|
|
*/
|
|
int64_t sub_range(const int64_t& range_start, const int64_t& range_end,
|
|
BitmapValue* ret_bitmap) {
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return 0;
|
|
case SINGLE: {
|
|
//only single value, so _sv must in [range_start,range_end)
|
|
if (range_start <= _sv && _sv < range_end) {
|
|
ret_bitmap->add(_sv);
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
case BITMAP: {
|
|
int64_t count = 0;
|
|
for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
|
|
if (*it < range_start) {
|
|
continue;
|
|
}
|
|
if (*it < range_end) {
|
|
ret_bitmap->add(*it);
|
|
++count;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Return new set with specified start and limit
|
|
* @param range_start the start value for the range
|
|
* @param cardinality_limit the length of the subset
|
|
* @return the real count for subset, maybe less than cardinality_limit
|
|
*/
|
|
int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit,
|
|
BitmapValue* ret_bitmap) {
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return 0;
|
|
case SINGLE: {
|
|
//only single value, so range_start must less than _sv
|
|
if (range_start > _sv) {
|
|
return 0;
|
|
} else {
|
|
ret_bitmap->add(_sv);
|
|
return 1;
|
|
}
|
|
}
|
|
case BITMAP: {
|
|
int64_t count = 0;
|
|
for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
|
|
if (*it < range_start) {
|
|
continue;
|
|
}
|
|
if (count < cardinality_limit) {
|
|
ret_bitmap->add(*it);
|
|
++count;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Returns the bitmap elements, starting from the offset position.
|
|
* The number of returned elements is limited by the cardinality_limit parameter.
|
|
* Analog of the substring string function, but for bitmap.
|
|
*/
|
|
int64_t offset_limit(const int64_t& offset, const int64_t& limit, BitmapValue* ret_bitmap) {
|
|
switch (_type) {
|
|
case EMPTY:
|
|
return 0;
|
|
case SINGLE: {
|
|
//only single value, so offset must start 0
|
|
if (offset == 0) {
|
|
ret_bitmap->add(_sv);
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
case BITMAP: {
|
|
if (std::abs(offset) >= _bitmap.cardinality()) {
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
int64_t abs_offset = offset;
|
|
if (offset < 0) {
|
|
abs_offset = _bitmap.cardinality() + offset;
|
|
}
|
|
|
|
int64_t count = 0;
|
|
int64_t offset_count = 0;
|
|
auto it = _bitmap.begin();
|
|
for (; it != _bitmap.end() && offset_count < abs_offset; ++it) {
|
|
++offset_count;
|
|
}
|
|
for (; it != _bitmap.end() && count < limit; ++it, ++count) {
|
|
ret_bitmap->add(*it);
|
|
}
|
|
return count;
|
|
}
|
|
|
|
//for function bitmap_to_array
|
|
void to_array(vectorized::PaddedPODArray<int64_t>& data) const {
|
|
switch (_type) {
|
|
case EMPTY:
|
|
break;
|
|
case SINGLE: {
|
|
data.emplace_back(_sv);
|
|
break;
|
|
}
|
|
case BITMAP: {
|
|
for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
|
|
data.emplace_back(*it);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void clear() {
|
|
_type = EMPTY;
|
|
_bitmap.clear();
|
|
_sv = 0;
|
|
}
|
|
|
|
// Implement an iterator for convenience
|
|
friend class BitmapValueIterator;
|
|
typedef BitmapValueIterator b_iterator;
|
|
|
|
b_iterator begin() const;
|
|
b_iterator end() const;
|
|
b_iterator lower_bound(uint64_t val) const;
|
|
|
|
private:
|
|
void _convert_to_smaller_type() {
|
|
if (_type == BITMAP) {
|
|
uint64_t c = _bitmap.cardinality();
|
|
if (c > 1) return;
|
|
if (c == 0) {
|
|
_type = EMPTY;
|
|
} else {
|
|
_type = SINGLE;
|
|
_sv = _bitmap.minimum();
|
|
}
|
|
_bitmap.clear();
|
|
}
|
|
}
|
|
|
|
uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
|
|
bool is_empty = false;
|
|
uint64_t result = 0;
|
|
switch (_type) {
|
|
case SINGLE:
|
|
result = _sv;
|
|
break;
|
|
case BITMAP:
|
|
result = func();
|
|
break;
|
|
default:
|
|
is_empty = true;
|
|
}
|
|
if (empty) {
|
|
*empty = is_empty;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
enum BitmapDataType {
|
|
EMPTY = 0,
|
|
SINGLE = 1, // single element
|
|
BITMAP = 2 // more than one elements
|
|
};
|
|
uint64_t _sv = 0; // store the single value when _type == SINGLE
|
|
detail::Roaring64Map _bitmap; // used when _type == BITMAP
|
|
BitmapDataType _type;
|
|
};
|
|
|
|
// A simple implement of bitmap value iterator(Read only)
|
|
// Usage:
|
|
// BitmapValueIterator iter = bitmap_value.begin();
|
|
// BitmapValueIterator end = bitmap_value.end();
|
|
// for (; iter != end(); ++iter) {
|
|
// uint64_t v = *iter;
|
|
// ... do something with "v" ...
|
|
// }
|
|
class BitmapValueIterator {
|
|
public:
|
|
BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) {
|
|
switch (_bitmap._type) {
|
|
case BitmapValue::BitmapDataType::EMPTY:
|
|
_end = true;
|
|
break;
|
|
case BitmapValue::BitmapDataType::SINGLE:
|
|
_sv = _bitmap._sv;
|
|
break;
|
|
case BitmapValue::BitmapDataType::BITMAP:
|
|
_iter = new detail::Roaring64MapSetBitForwardIterator(_bitmap._bitmap, _end);
|
|
break;
|
|
default:
|
|
CHECK(false) << _bitmap._type;
|
|
}
|
|
}
|
|
|
|
BitmapValueIterator(const BitmapValueIterator& other)
|
|
: _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
|
|
_iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
|
|
}
|
|
|
|
~BitmapValueIterator() {
|
|
if (_iter != nullptr) {
|
|
delete _iter;
|
|
_iter = nullptr;
|
|
}
|
|
}
|
|
|
|
uint64_t operator*() const {
|
|
CHECK(!_end) << "should not get value of end iterator";
|
|
switch (_bitmap._type) {
|
|
case BitmapValue::BitmapDataType::SINGLE:
|
|
return _sv;
|
|
case BitmapValue::BitmapDataType::BITMAP:
|
|
return *(*_iter);
|
|
default:
|
|
CHECK(false) << _bitmap._type;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
BitmapValueIterator& operator++() { // ++i, must returned inc. value
|
|
CHECK(!_end) << "should not forward when iterator ends";
|
|
switch (_bitmap._type) {
|
|
case BitmapValue::BitmapDataType::SINGLE:
|
|
_end = true;
|
|
break;
|
|
case BitmapValue::BitmapDataType::BITMAP:
|
|
++(*_iter);
|
|
break;
|
|
default:
|
|
CHECK(false) << _bitmap._type;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
BitmapValueIterator operator++(int) { // i++, must return orig. value
|
|
CHECK(!_end) << "should not forward when iterator ends";
|
|
BitmapValueIterator orig(*this);
|
|
switch (_bitmap._type) {
|
|
case BitmapValue::BitmapDataType::SINGLE:
|
|
_end = true;
|
|
break;
|
|
case BitmapValue::BitmapDataType::BITMAP:
|
|
++(*_iter);
|
|
break;
|
|
default:
|
|
CHECK(false) << _bitmap._type;
|
|
}
|
|
return orig;
|
|
}
|
|
|
|
bool operator==(const BitmapValueIterator& other) const {
|
|
if (_end && other._end) {
|
|
return true;
|
|
}
|
|
|
|
switch (_bitmap._type) {
|
|
case BitmapValue::BitmapDataType::EMPTY:
|
|
return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY;
|
|
case BitmapValue::BitmapDataType::SINGLE:
|
|
return _end == other._end && _sv == other._sv;
|
|
case BitmapValue::BitmapDataType::BITMAP:
|
|
return *_iter == *(other._iter);
|
|
default:
|
|
CHECK(false) << _bitmap._type;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
|
|
|
|
/**
|
|
* Move the iterator to the first value >= `val`.
|
|
*/
|
|
BitmapValueIterator& move(uint64_t val) {
|
|
switch (_bitmap._type) {
|
|
case BitmapValue::BitmapDataType::SINGLE:
|
|
if (_sv < val) {
|
|
_end = true;
|
|
}
|
|
break;
|
|
case BitmapValue::BitmapDataType::BITMAP:
|
|
if (!_iter->move(val)) {
|
|
_end = true;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
private:
|
|
const BitmapValue& _bitmap;
|
|
detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
|
|
uint64_t _sv = 0;
|
|
bool _end = false;
|
|
};
|
|
|
|
inline BitmapValueIterator BitmapValue::begin() const {
|
|
return BitmapValueIterator(*this);
|
|
}
|
|
|
|
inline BitmapValueIterator BitmapValue::end() const {
|
|
return BitmapValueIterator(*this, true);
|
|
}
|
|
|
|
inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
|
|
return BitmapValueIterator(*this).move(val);
|
|
}
|
|
|
|
inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
|
|
if (left > right) {
|
|
return false;
|
|
}
|
|
auto it = lower_bound(left);
|
|
return it != end() && *it <= right;
|
|
}
|
|
|
|
} // namespace doris
|