[enhancement](merge-on-write) Add delete bitmap correctness check in commit phase (#23316)

This commit is contained in:
bobhan1
2023-09-02 20:03:00 +08:00
committed by GitHub
parent 4854651901
commit 9898c08620
4 changed files with 35 additions and 13 deletions

View File

@ -163,7 +163,7 @@ Status DeltaWriter::commit_txn(const PSlaveTabletNodes& slave_tablet_nodes,
const bool write_single_replica) {
std::lock_guard<std::mutex> l(_lock);
SCOPED_TIMER(_commit_txn_timer);
_rowset_builder.commit_txn();
RETURN_IF_ERROR(_rowset_builder.commit_txn());
if (write_single_replica) {
for (auto node_info : slave_tablet_nodes.slave_nodes()) {

View File

@ -249,6 +249,20 @@ Status RowsetBuilder::wait_calc_delete_bitmap() {
}
Status RowsetBuilder::commit_txn() {
if (_tablet->enable_unique_key_merge_on_write() &&
config::enable_merge_on_write_correctness_check && _rowset->num_rows() != 0) {
auto st = _tablet->check_delete_bitmap_correctness(
_delete_bitmap, _rowset->end_version() - 1, _req.txn_id, _rowset_ids);
if (!st.ok()) {
LOG(WARNING) << fmt::format(
"[tablet_id:{}][txn_id:{}][load_id:{}][partition_id:{}] "
"delete bitmap correctness check failed in commit phase!",
_req.tablet_id, _req.txn_id, UniqueId(_req.load_id).to_string(),
_req.partition_id);
return st;
}
}
std::lock_guard<std::mutex> l(_lock);
SCOPED_TIMER(_commit_txn_timer);
Status res = _storage_engine->txn_manager()->commit_txn(_req.partition_id, _tablet, _req.txn_id,

View File

@ -3257,8 +3257,11 @@ Status Tablet::update_delete_bitmap_without_lock(const RowsetSharedPtr& rowset)
<< "(us), total rows: " << total_rows;
if (config::enable_merge_on_write_correctness_check) {
// check if all the rowset has ROWSET_SENTINEL_MARK
RETURN_IF_ERROR(_check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, -1,
cur_rowset_ids, &specified_rowsets));
auto st = check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, -1,
cur_rowset_ids, &specified_rowsets);
if (!st.ok()) {
LOG(WARNING) << fmt::format("delete bitmap correctness check failed in publish phase!");
}
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
}
for (auto iter = delete_bitmap->delete_bitmap.begin();
@ -3361,8 +3364,11 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset,
if (config::enable_merge_on_write_correctness_check && rowset->num_rows() != 0) {
// only do correctness check if the rowset has at least one row written
// check if all the rowset has ROWSET_SENTINEL_MARK
RETURN_IF_ERROR(_check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, txn_id,
cur_rowset_ids));
auto st = check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, -1,
cur_rowset_ids, &specified_rowsets);
if (!st.ok()) {
LOG(WARNING) << fmt::format("delete bitmap correctness check failed in publish phase!");
}
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
}
@ -3718,10 +3724,10 @@ void Tablet::_remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bit
}
}
Status Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
int64_t txn_id,
const RowsetIdUnorderedSet& rowset_ids,
std::vector<RowsetSharedPtr>* rowsets) {
Status Tablet::check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
int64_t txn_id,
const RowsetIdUnorderedSet& rowset_ids,
std::vector<RowsetSharedPtr>* rowsets) {
RowsetIdUnorderedSet missing_ids;
for (const auto& rowsetid : rowset_ids) {
if (!delete_bitmap->delete_bitmap.contains(
@ -3780,7 +3786,9 @@ Status Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, i
root.Accept(writer);
std::string rowset_status_string = std::string(strbuf.GetString());
LOG_EVERY_SECOND(WARNING) << rowset_status_string;
DCHECK(false) << "check delete bitmap correctness failed!";
// let it crash if correctness check failed in Debug mode
DCHECK(false) << "delete bitmap correctness check failed in publish phase!";
return Status::InternalError("check delete bitmap failed!");
}
return Status::OK();
}

View File

@ -552,6 +552,9 @@ public:
void set_binlog_config(BinlogConfig binlog_config);
void add_sentinel_mark_to_delete_bitmap(DeleteBitmap* delete_bitmap,
const RowsetIdUnorderedSet& rowsetids);
Status check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
int64_t txn_id, const RowsetIdUnorderedSet& rowset_ids,
std::vector<RowsetSharedPtr>* rowsets = nullptr);
private:
Status _init_once_action();
@ -597,9 +600,6 @@ private:
////////////////////////////////////////////////////////////////////////////
void _remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bitmap);
Status _check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
int64_t txn_id, const RowsetIdUnorderedSet& rowset_ids,
std::vector<RowsetSharedPtr>* rowsets = nullptr);
std::string _get_rowset_info_str(RowsetSharedPtr rowset, bool delete_flag);
public: