[feature-wip](unique-key-merge-on-write) Add delete bitmap for DSIP-018 (#10548)
Add delete bitmap for DSIP-018: Support Merge-On-Write implementation for UNIQUE KEY data model
This commit is contained in:
committed by
GitHub
parent
4e9d5a7f7a
commit
2084d8bdf3
@ -48,7 +48,8 @@ Status TabletMeta::create(const TCreateTabletReq& request, const TabletUid& tabl
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
TabletMeta::TabletMeta() : _tablet_uid(0, 0), _schema(new TabletSchema) {}
|
||||
TabletMeta::TabletMeta()
|
||||
: _tablet_uid(0, 0), _schema(new TabletSchema), _delete_bitmap(new DeleteBitmap()) {}
|
||||
|
||||
TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id,
|
||||
int64_t replica_id, int32_t schema_hash, uint64_t shard_id,
|
||||
@ -57,7 +58,7 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id
|
||||
TabletUid tablet_uid, TTabletType::type tabletType,
|
||||
TStorageMedium::type t_storage_medium, const std::string& storage_name,
|
||||
TCompressionType::type compression_type, const std::string& storage_policy)
|
||||
: _tablet_uid(0, 0), _schema(new TabletSchema) {
|
||||
: _tablet_uid(0, 0), _schema(new TabletSchema), _delete_bitmap(new DeleteBitmap()) {
|
||||
TabletMetaPB tablet_meta_pb;
|
||||
tablet_meta_pb.set_table_id(table_id);
|
||||
tablet_meta_pb.set_partition_id(partition_id);
|
||||
@ -456,6 +457,23 @@ void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) {
|
||||
_remote_storage_name = tablet_meta_pb.remote_storage_name();
|
||||
_storage_medium = tablet_meta_pb.storage_medium();
|
||||
_cooldown_resource = tablet_meta_pb.storage_policy();
|
||||
|
||||
if (tablet_meta_pb.has_delete_bitmap()) {
|
||||
int rst_ids_size = tablet_meta_pb.delete_bitmap().rowset_ids_size();
|
||||
int seg_ids_size = tablet_meta_pb.delete_bitmap().segment_ids_size();
|
||||
int versions_size = tablet_meta_pb.delete_bitmap().versions_size();
|
||||
int seg_maps_size = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps_size();
|
||||
CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size &&
|
||||
seg_maps_size == versions_size);
|
||||
for (size_t i = 0; i < rst_ids_size; ++i) {
|
||||
RowsetId rst_id;
|
||||
rst_id.init(tablet_meta_pb.delete_bitmap().rowset_ids(i));
|
||||
auto seg_id = tablet_meta_pb.delete_bitmap().segment_ids(i);
|
||||
uint32_t ver = tablet_meta_pb.delete_bitmap().versions(i);
|
||||
auto bitmap = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps(i).data();
|
||||
delete_bitmap().delete_bitmap[{rst_id, seg_id, ver}] = roaring::Roaring::read(bitmap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) {
|
||||
@ -505,6 +523,20 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) {
|
||||
tablet_meta_pb->set_remote_storage_name(_remote_storage_name);
|
||||
tablet_meta_pb->set_storage_medium(_storage_medium);
|
||||
tablet_meta_pb->set_storage_policy(_cooldown_resource);
|
||||
|
||||
{
|
||||
std::shared_lock l(delete_bitmap().lock);
|
||||
DeleteBitmapPB* delete_bitmap_pb = tablet_meta_pb->mutable_delete_bitmap();
|
||||
for (auto& [id, bitmap] : delete_bitmap().delete_bitmap) {
|
||||
auto& [rowset_id, segment_id, ver] = id;
|
||||
delete_bitmap_pb->add_rowset_ids(rowset_id.to_string());
|
||||
delete_bitmap_pb->add_segment_ids(segment_id);
|
||||
delete_bitmap_pb->add_versions(ver);
|
||||
std::string bitmap_data(bitmap.getSizeInBytes(), '\0');
|
||||
bitmap.write(bitmap_data.data());
|
||||
*(delete_bitmap_pb->add_segment_delete_bitmaps()) = std::move(bitmap_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t TabletMeta::mem_size() const {
|
||||
@ -729,4 +761,102 @@ bool operator!=(const TabletMeta& a, const TabletMeta& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
DeleteBitmap::DeleteBitmap() {}
|
||||
|
||||
DeleteBitmap::DeleteBitmap(const DeleteBitmap& o) {
|
||||
delete_bitmap = o.delete_bitmap; // just copy data
|
||||
}
|
||||
|
||||
DeleteBitmap& DeleteBitmap::operator=(const DeleteBitmap& o) {
|
||||
delete_bitmap = o.delete_bitmap; // just copy data
|
||||
return *this;
|
||||
}
|
||||
|
||||
DeleteBitmap::DeleteBitmap(DeleteBitmap&& o) {
|
||||
delete_bitmap = std::move(o.delete_bitmap);
|
||||
}
|
||||
|
||||
DeleteBitmap& DeleteBitmap::operator=(DeleteBitmap&& o) {
|
||||
delete_bitmap = std::move(o.delete_bitmap);
|
||||
return *this;
|
||||
}
|
||||
|
||||
DeleteBitmap DeleteBitmap::snapshot() const {
|
||||
std::shared_lock l(lock);
|
||||
return DeleteBitmap(*this);
|
||||
}
|
||||
|
||||
void DeleteBitmap::add(const BitmapKey& bmk, uint32_t row_id) {
|
||||
std::lock_guard l(lock);
|
||||
delete_bitmap[bmk].add(row_id);
|
||||
}
|
||||
|
||||
int DeleteBitmap::remove(const BitmapKey& bmk, uint32_t row_id) {
|
||||
std::lock_guard l(lock);
|
||||
auto it = delete_bitmap.find(bmk);
|
||||
if (it == delete_bitmap.end()) return -1;
|
||||
it->second.remove(row_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void DeleteBitmap::remove(const BitmapKey& start, const BitmapKey& end) {
|
||||
std::lock_guard l(lock);
|
||||
for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end();) {
|
||||
auto& [k, _] = *it;
|
||||
if (k >= end) {
|
||||
break;
|
||||
}
|
||||
it = delete_bitmap.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
bool DeleteBitmap::contains(const BitmapKey& bmk, uint32_t row_id) const {
|
||||
std::shared_lock l(lock);
|
||||
auto it = delete_bitmap.find(bmk);
|
||||
return it != delete_bitmap.end() && it->second.contains(row_id);
|
||||
}
|
||||
|
||||
int DeleteBitmap::set(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) {
|
||||
std::lock_guard l(lock);
|
||||
auto [_, inserted] = delete_bitmap.insert_or_assign(bmk, segment_delete_bitmap);
|
||||
return inserted;
|
||||
}
|
||||
|
||||
int DeleteBitmap::get(const BitmapKey& bmk, roaring::Roaring* segment_delete_bitmap) const {
|
||||
std::shared_lock l(lock);
|
||||
auto it = delete_bitmap.find(bmk);
|
||||
if (it == delete_bitmap.end()) return -1;
|
||||
*segment_delete_bitmap = it->second; // copy
|
||||
return 0;
|
||||
}
|
||||
|
||||
const roaring::Roaring* DeleteBitmap::get(const BitmapKey& bmk) const {
|
||||
std::shared_lock l(lock);
|
||||
auto it = delete_bitmap.find(bmk);
|
||||
if (it == delete_bitmap.end()) return nullptr;
|
||||
return &(it->second); // get address
|
||||
}
|
||||
|
||||
void DeleteBitmap::subset(const BitmapKey& start, const BitmapKey& end,
|
||||
DeleteBitmap* subset_rowset_map) const {
|
||||
roaring::Roaring roaring;
|
||||
DCHECK(start < end);
|
||||
std::shared_lock l(lock);
|
||||
for (auto it = delete_bitmap.upper_bound(start); it != delete_bitmap.end(); ++it) {
|
||||
auto& [k, bm] = *it;
|
||||
if (k >= end) {
|
||||
break;
|
||||
}
|
||||
subset_rowset_map->set(k, bm);
|
||||
}
|
||||
}
|
||||
|
||||
void DeleteBitmap::merge(const DeleteBitmap& other) {
|
||||
std::lock_guard l(lock);
|
||||
for (auto& i : other.delete_bitmap) {
|
||||
auto [j, succ] = this->delete_bitmap.insert(i);
|
||||
if (!succ) j->second |= i.second;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -67,6 +67,7 @@ class RowsetMeta;
|
||||
class Rowset;
|
||||
class DataDir;
|
||||
class TabletMeta;
|
||||
class DeleteBitmap;
|
||||
using TabletMetaSharedPtr = std::shared_ptr<TabletMeta>;
|
||||
|
||||
// Class encapsulates meta of tablet.
|
||||
@ -199,6 +200,8 @@ public:
|
||||
_cooldown_resource = std::move(resource);
|
||||
}
|
||||
|
||||
DeleteBitmap& delete_bitmap() { return *_delete_bitmap; }
|
||||
|
||||
private:
|
||||
Status _save_meta(DataDir* data_dir);
|
||||
void _init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn, ColumnPB* column);
|
||||
@ -239,9 +242,119 @@ private:
|
||||
// FIXME(cyx): Currently `cooldown_resource` is equivalent to `storage_policy`.
|
||||
io::ResourceId _cooldown_resource;
|
||||
|
||||
std::unique_ptr<DeleteBitmap> _delete_bitmap;
|
||||
|
||||
mutable std::shared_mutex _meta_lock;
|
||||
};
|
||||
|
||||
/**
|
||||
* Wraps multiple bitmaps for recording rows (row id) that are deleted or
|
||||
* overwritten.
|
||||
*
|
||||
* RowsetId and SegmentId are for locating segment, Version here is a single
|
||||
* uint32_t means that at which "version" of the load causes the delete or
|
||||
* overwrite.
|
||||
*
|
||||
* The start and end version of a load is the same, it's ok and straightforward
|
||||
* to use a single uint32_t.
|
||||
*
|
||||
* e.g.
|
||||
* There is a key "key1" in rowset id 1, version [1,1], segment id 1, row id 1.
|
||||
* A new load also contains "key1", the rowset id 2, version [2,2], segment id 1
|
||||
* the delete bitmap will be `{1,1,2} -> 1`, which means the "row id 1" in
|
||||
* "rowset id 1, segment id 1" is deleted/overitten by some loads at "version 2"
|
||||
*/
|
||||
class DeleteBitmap {
|
||||
public:
|
||||
mutable std::shared_mutex lock;
|
||||
using SegmentId = uint32_t;
|
||||
using Version = uint32_t;
|
||||
using BitmapKey = std::tuple<RowsetId, SegmentId, Version>;
|
||||
std::map<BitmapKey, roaring::Roaring> delete_bitmap; // Ordered map
|
||||
|
||||
DeleteBitmap();
|
||||
|
||||
/**
|
||||
* Copy c-tor for making delete bitmap snapshot on read path
|
||||
*/
|
||||
DeleteBitmap(const DeleteBitmap& r);
|
||||
DeleteBitmap& operator=(const DeleteBitmap& r);
|
||||
/**
|
||||
* Move c-tor for making delete bitmap snapshot on read path
|
||||
*/
|
||||
DeleteBitmap(DeleteBitmap&& r);
|
||||
DeleteBitmap& operator=(DeleteBitmap&& r);
|
||||
|
||||
/**
|
||||
* Makes a snapshot of delete bimap, read lock will be acquired in this
|
||||
* process
|
||||
*/
|
||||
DeleteBitmap snapshot() const;
|
||||
|
||||
/**
|
||||
* Marks the specific row deleted
|
||||
*/
|
||||
void add(const BitmapKey& bmk, uint32_t row_id);
|
||||
|
||||
/**
|
||||
* Clears the deletetion mark specific row
|
||||
*
|
||||
* @return non-zero if the associated delete bimap does not exist
|
||||
*/
|
||||
int remove(const BitmapKey& bmk, uint32_t row_id);
|
||||
|
||||
/**
|
||||
* Clears bitmaps in range [lower_key, upper_key)
|
||||
*/
|
||||
void remove(const BitmapKey& lower_key, const BitmapKey& upper_key);
|
||||
|
||||
/**
|
||||
* Checks if the given row is marked deleted
|
||||
*
|
||||
* @return true if marked deleted
|
||||
*/
|
||||
bool contains(const BitmapKey& bmk, uint32_t row_id) const;
|
||||
|
||||
/**
|
||||
* Sets the bitmap of specific segment, it's may be insertion or replacement
|
||||
*
|
||||
* @return 0 if the insertion took place, 1 if the assignment took place
|
||||
*/
|
||||
int set(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap);
|
||||
|
||||
/**
|
||||
* Gets a copy of specific delete bmk
|
||||
*
|
||||
* @param segment_delete_bitmap output param
|
||||
* @return non-zero if the associated delete bimap does not exist
|
||||
*/
|
||||
int get(const BitmapKey& bmk, roaring::Roaring* segment_delete_bitmap) const;
|
||||
|
||||
/**
|
||||
* Gets reference to a specific delete map, DO NOT use this function on a
|
||||
* mutable DeleteBitmap object
|
||||
* @return nullptr if the given bitmap does not exist
|
||||
*/
|
||||
const roaring::Roaring* get(const BitmapKey& bmk) const;
|
||||
|
||||
/**
|
||||
* Gets subset of delete_bitmap with given range [start, end)
|
||||
*
|
||||
* @parma start start
|
||||
* @parma end end
|
||||
* @parma subset_delete_map output param
|
||||
*/
|
||||
void subset(const BitmapKey& start, const BitmapKey& end,
|
||||
DeleteBitmap* subset_delete_map) const;
|
||||
|
||||
/**
|
||||
* Merges the given delete bitmap into *this
|
||||
*
|
||||
* @param other
|
||||
*/
|
||||
void merge(const DeleteBitmap& other);
|
||||
};
|
||||
|
||||
static const std::string SEQUENCE_COL = "__DORIS_SEQUENCE_COL__";
|
||||
|
||||
inline TabletUid TabletMeta::tablet_uid() const {
|
||||
|
||||
@ -42,4 +42,83 @@ TEST(TabletMetaTest, SaveAndParse) {
|
||||
EXPECT_EQ(old_tablet_meta, new_tablet_meta);
|
||||
}
|
||||
|
||||
TEST(TabletMetaTest, TestDeleteBitmap) {
|
||||
std::unique_ptr<DeleteBitmap> dbmp(new DeleteBitmap());
|
||||
auto gen1 = [&dbmp](int64_t max_rst_id, uint32_t max_seg_id, uint32_t max_row) {
|
||||
for (int64_t i = 0; i < max_rst_id; ++i) {
|
||||
for (uint32_t j = 0; j < max_seg_id; ++j) {
|
||||
for (uint32_t k = 0; k < max_row; ++k) {
|
||||
dbmp->add({RowsetId {2, 0, 1, i}, j, 0}, k);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
gen1(10, 20, 1000);
|
||||
dbmp->add({RowsetId {2, 0, 1, 2}, 2, 0}, 2); // redundant
|
||||
{
|
||||
roaring::Roaring d;
|
||||
dbmp->get({RowsetId {2, 0, 1, 2}, 0, 0}, &d);
|
||||
EXPECT_EQ(d.cardinality(), 1000);
|
||||
d -= *dbmp->get({RowsetId {2, 0, 1, 2}, 0, 0});
|
||||
EXPECT_EQ(d.cardinality(), 0);
|
||||
}
|
||||
|
||||
// Add version 1 and 2
|
||||
dbmp->add({RowsetId {2, 0, 1, 1}, 1, 1}, 1100);
|
||||
dbmp->add({RowsetId {2, 0, 1, 1}, 1, 1}, 1101);
|
||||
dbmp->add({RowsetId {2, 0, 1, 1}, 1, 1}, 1102);
|
||||
dbmp->add({RowsetId {2, 0, 1, 1}, 1, 1}, 1103);
|
||||
dbmp->add({RowsetId {2, 0, 1, 1}, 1, 2}, 1104);
|
||||
|
||||
ASSERT_EQ(dbmp->delete_bitmap.size(), 10 * 20 + 2);
|
||||
|
||||
{ // Bitmap of certain verisons only get their own row ids
|
||||
auto bm = dbmp->get({RowsetId {2, 0, 1, 1}, 1, 2});
|
||||
ASSERT_EQ(bm->cardinality(), 1);
|
||||
ASSERT_FALSE(bm->contains(999));
|
||||
ASSERT_FALSE(bm->contains(1100));
|
||||
ASSERT_TRUE(bm->contains(1104));
|
||||
}
|
||||
|
||||
{
|
||||
// test remove
|
||||
// Nothing removed
|
||||
dbmp->remove({RowsetId {2, 0, 1, 1}, 0, 0}, {RowsetId {2, 0, 1, 1}, 0, 0});
|
||||
ASSERT_EQ(dbmp->delete_bitmap.size(), 10 * 20 + 2);
|
||||
dbmp->remove({RowsetId {2, 0, 1, 100}, 0, 0}, {RowsetId {2, 0, 1, 100}, 50000, 0});
|
||||
ASSERT_EQ(dbmp->delete_bitmap.size(), 10 * 20 + 2);
|
||||
|
||||
// Remove all seg of rowset {2,0,1,0}
|
||||
dbmp->remove({RowsetId {2, 0, 1, 0}, 0, 0}, {RowsetId {2, 0, 1, 0}, 5000, 0});
|
||||
ASSERT_EQ(dbmp->delete_bitmap.size(), 9 * 20 + 2);
|
||||
// Remove all rowset {2,0,1,7} to {2,0,1,9}
|
||||
dbmp->remove({RowsetId {2, 0, 1, 8}, 0, 0}, {RowsetId {2, 0, 1, 9}, 5000, 0});
|
||||
ASSERT_EQ(dbmp->delete_bitmap.size(), 7 * 20 + 2);
|
||||
}
|
||||
|
||||
{
|
||||
DeleteBitmap db_upper;
|
||||
dbmp->subset({RowsetId {2, 0, 1, 1}, 1, 0}, {RowsetId {2, 0, 1, 1}, 1000000, 0}, &db_upper);
|
||||
roaring::Roaring d;
|
||||
ASSERT_EQ(db_upper.get({RowsetId {2, 0, 1, 1}, 1, 1}, &d), 0);
|
||||
ASSERT_EQ(d.cardinality(), 4);
|
||||
ASSERT_EQ(db_upper.get({RowsetId {2, 0, 1, 1}, 1, 2}, &d), 0);
|
||||
ASSERT_EQ(d.cardinality(), 1);
|
||||
ASSERT_EQ(db_upper.delete_bitmap.size(), 20);
|
||||
}
|
||||
|
||||
{
|
||||
auto old_size = dbmp->delete_bitmap.size();
|
||||
// test merge
|
||||
DeleteBitmap other;
|
||||
other.add({RowsetId {2, 0, 1, 1}, 1, 1}, 1100);
|
||||
dbmp->merge(other);
|
||||
ASSERT_EQ(dbmp->delete_bitmap.size(), old_size);
|
||||
other.add({RowsetId {2, 0, 1, 1}, 1001, 1}, 1100);
|
||||
other.add({RowsetId {2, 0, 1, 1}, 1002, 1}, 1100);
|
||||
dbmp->merge(other);
|
||||
ASSERT_EQ(dbmp->delete_bitmap.size(), old_size + 2);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -146,5 +146,6 @@
|
||||
"storage_medium": "HDD",
|
||||
"remote_storage_name": "",
|
||||
"replica_id": 0,
|
||||
"storage_policy": ""
|
||||
"storage_policy": "",
|
||||
"delete_bitmap": {}
|
||||
}
|
||||
|
||||
@ -278,6 +278,7 @@ message TabletMetaPB {
|
||||
optional string remote_storage_name = 20;
|
||||
optional int64 replica_id = 21 [default = 0];
|
||||
optional string storage_policy = 22;
|
||||
optional DeleteBitmapPB delete_bitmap = 23;
|
||||
}
|
||||
|
||||
message OLAPIndexHeaderMessage {
|
||||
@ -298,3 +299,11 @@ message OLAPDataHeaderMessage {
|
||||
message OLAPRawDeltaHeaderMessage {
|
||||
required int32 schema_hash = 2;
|
||||
}
|
||||
|
||||
message DeleteBitmapPB {
|
||||
repeated string rowset_ids = 1;
|
||||
repeated uint32 segment_ids = 2;
|
||||
repeated int64 versions = 3;
|
||||
// Serialized roaring bitmaps indexed with {rowset_id, segment_id, version}
|
||||
repeated bytes segment_delete_bitmaps = 4;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user