[enhancement](merge-on-write) Add delete bitmap correctness check in commit phase (#23316)
This commit is contained in:
@ -163,7 +163,7 @@ Status DeltaWriter::commit_txn(const PSlaveTabletNodes& slave_tablet_nodes,
|
||||
const bool write_single_replica) {
|
||||
std::lock_guard<std::mutex> l(_lock);
|
||||
SCOPED_TIMER(_commit_txn_timer);
|
||||
_rowset_builder.commit_txn();
|
||||
RETURN_IF_ERROR(_rowset_builder.commit_txn());
|
||||
|
||||
if (write_single_replica) {
|
||||
for (auto node_info : slave_tablet_nodes.slave_nodes()) {
|
||||
|
||||
@ -249,6 +249,20 @@ Status RowsetBuilder::wait_calc_delete_bitmap() {
|
||||
}
|
||||
|
||||
Status RowsetBuilder::commit_txn() {
|
||||
if (_tablet->enable_unique_key_merge_on_write() &&
|
||||
config::enable_merge_on_write_correctness_check && _rowset->num_rows() != 0) {
|
||||
auto st = _tablet->check_delete_bitmap_correctness(
|
||||
_delete_bitmap, _rowset->end_version() - 1, _req.txn_id, _rowset_ids);
|
||||
if (!st.ok()) {
|
||||
LOG(WARNING) << fmt::format(
|
||||
"[tablet_id:{}][txn_id:{}][load_id:{}][partition_id:{}] "
|
||||
"delete bitmap correctness check failed in commit phase!",
|
||||
_req.tablet_id, _req.txn_id, UniqueId(_req.load_id).to_string(),
|
||||
_req.partition_id);
|
||||
return st;
|
||||
}
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> l(_lock);
|
||||
SCOPED_TIMER(_commit_txn_timer);
|
||||
Status res = _storage_engine->txn_manager()->commit_txn(_req.partition_id, _tablet, _req.txn_id,
|
||||
|
||||
@ -3257,8 +3257,11 @@ Status Tablet::update_delete_bitmap_without_lock(const RowsetSharedPtr& rowset)
|
||||
<< "(us), total rows: " << total_rows;
|
||||
if (config::enable_merge_on_write_correctness_check) {
|
||||
// check if all the rowset has ROWSET_SENTINEL_MARK
|
||||
RETURN_IF_ERROR(_check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, -1,
|
||||
cur_rowset_ids, &specified_rowsets));
|
||||
auto st = check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, -1,
|
||||
cur_rowset_ids, &specified_rowsets);
|
||||
if (!st.ok()) {
|
||||
LOG(WARNING) << fmt::format("delete bitmap correctness check failed in publish phase!");
|
||||
}
|
||||
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
|
||||
}
|
||||
for (auto iter = delete_bitmap->delete_bitmap.begin();
|
||||
@ -3361,8 +3364,11 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset,
|
||||
if (config::enable_merge_on_write_correctness_check && rowset->num_rows() != 0) {
|
||||
// only do correctness check if the rowset has at least one row written
|
||||
// check if all the rowset has ROWSET_SENTINEL_MARK
|
||||
RETURN_IF_ERROR(_check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, txn_id,
|
||||
cur_rowset_ids));
|
||||
auto st = check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, -1,
|
||||
cur_rowset_ids, &specified_rowsets);
|
||||
if (!st.ok()) {
|
||||
LOG(WARNING) << fmt::format("delete bitmap correctness check failed in publish phase!");
|
||||
}
|
||||
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
|
||||
}
|
||||
|
||||
@ -3718,10 +3724,10 @@ void Tablet::_remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bit
|
||||
}
|
||||
}
|
||||
|
||||
Status Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
|
||||
int64_t txn_id,
|
||||
const RowsetIdUnorderedSet& rowset_ids,
|
||||
std::vector<RowsetSharedPtr>* rowsets) {
|
||||
Status Tablet::check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
|
||||
int64_t txn_id,
|
||||
const RowsetIdUnorderedSet& rowset_ids,
|
||||
std::vector<RowsetSharedPtr>* rowsets) {
|
||||
RowsetIdUnorderedSet missing_ids;
|
||||
for (const auto& rowsetid : rowset_ids) {
|
||||
if (!delete_bitmap->delete_bitmap.contains(
|
||||
@ -3780,7 +3786,9 @@ Status Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, i
|
||||
root.Accept(writer);
|
||||
std::string rowset_status_string = std::string(strbuf.GetString());
|
||||
LOG_EVERY_SECOND(WARNING) << rowset_status_string;
|
||||
DCHECK(false) << "check delete bitmap correctness failed!";
|
||||
// let it crash if correctness check failed in Debug mode
|
||||
DCHECK(false) << "delete bitmap correctness check failed in publish phase!";
|
||||
return Status::InternalError("check delete bitmap failed!");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -552,6 +552,9 @@ public:
|
||||
void set_binlog_config(BinlogConfig binlog_config);
|
||||
void add_sentinel_mark_to_delete_bitmap(DeleteBitmap* delete_bitmap,
|
||||
const RowsetIdUnorderedSet& rowsetids);
|
||||
Status check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
|
||||
int64_t txn_id, const RowsetIdUnorderedSet& rowset_ids,
|
||||
std::vector<RowsetSharedPtr>* rowsets = nullptr);
|
||||
|
||||
private:
|
||||
Status _init_once_action();
|
||||
@ -597,9 +600,6 @@ private:
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void _remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bitmap);
|
||||
Status _check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
|
||||
int64_t txn_id, const RowsetIdUnorderedSet& rowset_ids,
|
||||
std::vector<RowsetSharedPtr>* rowsets = nullptr);
|
||||
std::string _get_rowset_info_str(RowsetSharedPtr rowset, bool delete_flag);
|
||||
|
||||
public:
|
||||
|
||||
Reference in New Issue
Block a user