diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index 6e29a37dd7..326649b7dd 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -422,13 +422,6 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet break; } } - - // Take a full snapshot, will revise according to missed rowset later. - if (ref_tablet->keys_type() == UNIQUE_KEYS && - ref_tablet->enable_unique_key_merge_on_write()) { - delete_bitmap_snapshot = ref_tablet->tablet_meta()->delete_bitmap().snapshot( - ref_tablet->max_version().second); - } } int64_t version = -1; diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 4c10ca90d6..ac24121a77 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -195,17 +195,55 @@ void Tablet::save_meta() { } Status Tablet::revise_tablet_meta(const std::vector& to_add, - const std::vector& to_delete) { + const std::vector& to_delete, + bool is_incremental_clone) { LOG(INFO) << "begin to revise tablet. tablet_id=" << tablet_id(); delete_rowsets(to_delete, false); add_rowsets(to_add); - // FIXME: How to reclaim delete bitmap of deleted rowsets and stale rowsets? if (keys_type() == UNIQUE_KEYS && enable_unique_key_merge_on_write()) { auto new_rowset_tree = std::make_unique(); ModifyRowSetTree(*_rowset_tree, to_delete, to_add, new_rowset_tree.get()); _rowset_tree = std::move(new_rowset_tree); + std::vector calc_delete_bitmap_rowsets; + int64_t to_add_min_version = INT64_MAX; + int64_t to_add_max_version = INT64_MIN; for (auto& rs : to_add) { - RETURN_IF_ERROR(update_delete_bitmap_without_lock(rs)); + if (to_add_min_version > rs->start_version()) { + to_add_min_version = rs->start_version(); + } + if (to_add_max_version < rs->end_version()) { + to_add_max_version = rs->end_version(); + } + } + Version calc_delete_bitmap_ver; + if (is_incremental_clone) { + // From the rowset of to_add with smallest version, all other rowsets + // need to recalculate the delete bitmap + // For example: + // local tablet: [0-1] [2-5] [6-6] [9-10] + // clone tablet: [7-7] [8-8] + // new tablet: [0-1] [2-5] [6-6] [7-7] [8-8] [9-10] + // [7-7] [8-8] [9-10] need to recalculate delete bitmap + calc_delete_bitmap_ver = Version(to_add_min_version, max_version().second); + } else { + // the delete bitmap of to_add's rowsets has clone from remote when full clone. + // only other rowsets in local need to recalculate the delete bitmap. + // For example: + // local tablet: [0-1]x [2-5]x [6-6]x [7-7]x [9-10] + // clone tablet: [0-1] [2-4] [5-6] [7-8] + // new tablet: [0-1] [2-4] [5-6] [7-8] [9-10] + // only [9-10] need to recalculate delete bitmap + CHECK_EQ(to_add_min_version, 0) << "to_add_min_version is: " << to_add_min_version; + calc_delete_bitmap_ver = Version(to_add_max_version + 1, max_version().second); + } + Status res = + capture_consistent_rowsets(calc_delete_bitmap_ver, &calc_delete_bitmap_rowsets); + // Because the data in memory has been changed, can't return an error. + CHECK(res.ok()) << "fail to capture_consistent_rowsets, res: " << res; + + for (auto rs : calc_delete_bitmap_rowsets) { + res = update_delete_bitmap_without_lock(rs); + CHECK(res.ok()) << "fail to update_delete_bitmap_without_lock, res: " << res; } } // reconstruct from tablet meta @@ -2473,24 +2511,19 @@ void Tablet::_rowset_ids_difference(const RowsetIdUnorderedSet& cur, // The caller should hold _rowset_update_lock and _meta_lock lock. Status Tablet::update_delete_bitmap_without_lock(const RowsetSharedPtr& rowset) { - int64_t cur_version = rowset->start_version(); + int64_t cur_version = rowset->end_version(); std::vector segments; _load_rowset_segments(rowset, &segments); + RowsetIdUnorderedSet cur_rowset_ids = all_rs_id(cur_version - 1); DeleteBitmapPtr delete_bitmap = std::make_shared(tablet_id()); - RETURN_IF_ERROR(calc_delete_bitmap(rowset->rowset_id(), segments, nullptr, delete_bitmap, - cur_version - 1, true)); + RETURN_IF_ERROR(calc_delete_bitmap(rowset->rowset_id(), segments, &cur_rowset_ids, + delete_bitmap, cur_version - 1, true)); for (auto iter = delete_bitmap->delete_bitmap.begin(); iter != delete_bitmap->delete_bitmap.end(); ++iter) { - int ret = _tablet_meta->delete_bitmap().set( + _tablet_meta->delete_bitmap().merge( {std::get<0>(iter->first), std::get<1>(iter->first), cur_version}, iter->second); - DCHECK(ret == 1); - if (ret != 1) { - LOG(INFO) << "failed to set delete bimap, key is: |" << std::get<0>(iter->first) << "|" - << std::get<1>(iter->first) << "|" << cur_version; - return Status::InternalError("failed to set delete bimap"); - } } return Status::OK(); @@ -2549,14 +2582,8 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset, const TabletT // and publish_txn runs sequential so no need to lock here for (auto iter = delete_bitmap->delete_bitmap.begin(); iter != delete_bitmap->delete_bitmap.end(); ++iter) { - int ret = _tablet_meta->delete_bitmap().set( + _tablet_meta->delete_bitmap().merge( {std::get<0>(iter->first), std::get<1>(iter->first), cur_version}, iter->second); - DCHECK(ret == 1); - if (ret != 1) { - LOG(INFO) << "failed to set delete bimap, key is: |" << std::get<0>(iter->first) << "|" - << std::get<1>(iter->first) << "|" << cur_version; - return Status::InternalError("failed to set delete bimap"); - } } return Status::OK(); diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index c14f27e748..5931faab82 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -83,7 +83,8 @@ public: void save_meta(); // Used in clone task, to update local meta when finishing a clone job Status revise_tablet_meta(const std::vector& to_add, - const std::vector& to_delete); + const std::vector& to_delete, + bool is_incremental_clone); int64_t cumulative_layer_point() const; void set_cumulative_layer_point(int64_t new_point); diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index becf2c5c03..153acb3256 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -726,6 +726,11 @@ void TabletMeta::modify_rs_metas(const std::vector& to_add, ++it; } } + // delete delete_bitmap of to_delete's rowsets if not added to _stale_rs_metas. + if (same_version && _enable_unique_key_merge_on_write) { + delete_bitmap().remove({rs_to_del->rowset_id(), 0, 0}, + {rs_to_del->rowset_id(), UINT32_MAX, 0}); + } } if (!same_version) { // put to_delete rowsets in _stale_rs_metas. @@ -980,6 +985,14 @@ void DeleteBitmap::subset(const BitmapKey& start, const BitmapKey& end, } } +void DeleteBitmap::merge(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) { + std::lock_guard l(lock); + auto [iter, succ] = delete_bitmap.emplace(bmk, segment_delete_bitmap); + if (!succ) { + iter->second |= segment_delete_bitmap; + } +} + void DeleteBitmap::merge(const DeleteBitmap& other) { std::lock_guard l(lock); for (auto& i : other.delete_bitmap) { diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 0c9ed40136..6410563bd3 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -361,6 +361,14 @@ public: void subset(const BitmapKey& start, const BitmapKey& end, DeleteBitmap* subset_delete_map) const; + /** + * Merges the given segment delete bitmap into *this + * + * @param bmk + * @param segment_delete_bitmap + */ + void merge(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap); + /** * Merges the given delete bitmap into *this * diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 70f1bd2d2e..7ef38f450b 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -579,7 +579,7 @@ Status EngineCloneTask::_finish_incremental_clone(Tablet* tablet, /// clone_data to tablet /// For incremental clone, nothing will be deleted. /// So versions_to_delete is empty. - return tablet->revise_tablet_meta(rowsets_to_clone, {}); + return tablet->revise_tablet_meta(rowsets_to_clone, {}, true); } /// This method will do: @@ -632,7 +632,10 @@ Status EngineCloneTask::_finish_full_clone(Tablet* tablet, to_add.push_back(std::move(rs)); } tablet->tablet_meta()->set_cooldown_meta_id(cloned_tablet_meta->cooldown_meta_id()); - return tablet->revise_tablet_meta(to_add, to_delete); + if (tablet->enable_unique_key_merge_on_write()) { + tablet->tablet_meta()->delete_bitmap() = cloned_tablet_meta->delete_bitmap(); + } + return tablet->revise_tablet_meta(to_add, to_delete, false); // TODO(plat1ko): write cooldown meta to remote if this replica is cooldown replica }