[Enhancement](merge-on-write) add correctness check for the calculation of delete bitmap (#22282)

Currently, for merge-on-write unique table, the delete bitmap of a rowset will be calculated during flush phase, commit phase and publish phase. In this PR, we add a special mark in every rowset considered when we calculate delete bitmap in these three phases. Before we finally merge the delete bitmap to the table meta's delete bitmap, we will check if all the rowsets contain the special mark to check if we have considered all the rowsets during the above three phases.
Because the executor can not fail in publish phase if the coordinator have received successful commits info from all the executors, we just print logs if this correctness check failed rather than report a failure.
This commit is contained in:
bobhan1
2023-08-11 21:12:35 +08:00
committed by GitHub
parent 44475b64ef
commit cd6453434b
6 changed files with 77 additions and 0 deletions

View File

@ -1054,6 +1054,8 @@ DEFINE_Bool(enable_hdfs_hedged_read, "false");
DEFINE_Int32(hdfs_hedged_read_thread_num, "128");
DEFINE_Int32(hdfs_hedged_read_threshold_time, "500");
DEFINE_mBool(enable_merge_on_write_correctness_check, "true");
// The secure path with user files, used in the `local` table function.
DEFINE_mString(user_files_secure_path, "${DORIS_HOME}");

View File

@ -1111,6 +1111,8 @@ DECLARE_Int32(hdfs_hedged_read_thread_num);
// Maybe overwritten by the value specified when creating catalog
DECLARE_Int32(hdfs_hedged_read_threshold_time);
DECLARE_mBool(enable_merge_on_write_correctness_check);
// The secure path with user files, used in the `local` table function.
DECLARE_mString(user_files_secure_path);

View File

@ -451,6 +451,11 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
}
CHECK(use_default_or_null_flag.size() == num_rows);
if (config::enable_merge_on_write_correctness_check) {
_tablet->add_sentinel_mark_to_delete_bitmap(_mow_context->delete_bitmap,
_mow_context->rowset_ids);
}
// read and fill block
auto mutable_full_columns = full_block.mutate_columns();
RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns, use_default_or_null_flag,

View File

@ -2979,6 +2979,17 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset,
}
DCHECK_EQ(total, row_id) << "segment total rows: " << total << " row_id:" << row_id;
if (config::enable_merge_on_write_correctness_check) {
RowsetIdUnorderedSet rowsetids;
for (const auto& rowset : specified_rowsets) {
rowsetids.emplace(rowset->rowset_id());
LOG(INFO) << "[tabletID:" << tablet_id() << "]"
<< "[add_sentinel_mark_to_delete_bitmap][end_version:" << end_version << "]"
<< "add:" << rowset->rowset_id();
}
add_sentinel_mark_to_delete_bitmap(delete_bitmap, rowsetids);
}
if (pos > 0) {
RETURN_IF_ERROR(generate_new_block_for_partial_update(
rowset_schema, read_plan_ori, read_plan_update, rsid_to_rowset, &block));
@ -3223,6 +3234,12 @@ Status Tablet::update_delete_bitmap_without_lock(const RowsetSharedPtr& rowset)
<< ", rowset_ids: " << cur_rowset_ids.size() << ", cur max_version: " << cur_version
<< ", transaction_id: " << -1 << ", cost: " << watch.get_elapse_time_us()
<< "(us), total rows: " << total_rows;
if (config::enable_merge_on_write_correctness_check) {
// check if all the rowset has ROWSET_SENTINEL_MARK
RETURN_IF_ERROR(
_check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, cur_rowset_ids));
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
}
for (auto iter = delete_bitmap->delete_bitmap.begin();
iter != delete_bitmap->delete_bitmap.end(); ++iter) {
_tablet_meta->delete_bitmap().merge(
@ -3320,6 +3337,14 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset,
<< ", cur max_version: " << cur_version << ", transaction_id: " << txn_id
<< ", cost: " << watch.get_elapse_time_us() << "(us), total rows: " << total_rows;
if (config::enable_merge_on_write_correctness_check && rowset->num_rows() != 0) {
// only do correctness check if the rowset has at least one row written
// check if all the rowset has ROWSET_SENTINEL_MARK
RETURN_IF_ERROR(
_check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, cur_rowset_ids));
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
}
// update version without write lock, compaction and publish_txn
// will update delete bitmap, handle compaction with _rowset_update_lock
// and publish_txn runs sequential so no need to lock here
@ -3653,4 +3678,37 @@ Status Tablet::calc_delete_bitmap_between_segments(
return Status::OK();
}
void Tablet::add_sentinel_mark_to_delete_bitmap(DeleteBitmapPtr delete_bitmap,
const RowsetIdUnorderedSet& rowsetids) {
for (const auto& rowsetid : rowsetids) {
delete_bitmap->add({rowsetid, DeleteBitmap::INVALID_SEGMENT_ID, 0},
DeleteBitmap::ROWSET_SENTINEL_MARK);
}
}
void Tablet::_remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bitmap) {
for (auto it = delete_bitmap->delete_bitmap.begin(), end = delete_bitmap->delete_bitmap.end();
it != end;) {
if (std::get<1>(it->first) == DeleteBitmap::INVALID_SEGMENT_ID) {
it = delete_bitmap->delete_bitmap.erase(it);
} else {
++it;
}
}
}
Status Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
const RowsetIdUnorderedSet& rowset_ids) const {
for (const auto& rowsetid : rowset_ids) {
if (!delete_bitmap->delete_bitmap.contains(
{rowsetid, DeleteBitmap::INVALID_SEGMENT_ID, 0})) {
LOG(WARNING) << "check delete bitmap correctness failed, can't find sentinel mark in "
"rowset with RowsetId:"
<< rowsetid << ",max_version:" << max_version;
DCHECK(false) << "check delete bitmap correctness failed!";
return Status::OK();
}
}
return Status::OK();
}
} // namespace doris

View File

@ -549,6 +549,8 @@ public:
int64_t binlog_max_bytes() const { return _tablet_meta->binlog_config().max_bytes(); }
void set_binlog_config(BinlogConfig binlog_config);
void add_sentinel_mark_to_delete_bitmap(DeleteBitmapPtr delete_bitmap,
const RowsetIdUnorderedSet& rowsetids);
private:
Status _init_once_action();
@ -597,6 +599,10 @@ private:
// end cooldown functions
////////////////////////////////////////////////////////////////////////////
void _remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bitmap);
Status _check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
const RowsetIdUnorderedSet& rowset_ids) const;
public:
static const int64_t K_INVALID_CUMULATIVE_POINT = -1;

View File

@ -23,6 +23,7 @@
#include <atomic>
#include <cstddef>
#include <limits>
#include <map>
#include <memory>
#include <mutex>
@ -335,6 +336,9 @@ public:
using Version = uint64_t;
using BitmapKey = std::tuple<RowsetId, SegmentId, Version>;
std::map<BitmapKey, roaring::Roaring> delete_bitmap; // Ordered map
constexpr static inline uint32_t INVALID_SEGMENT_ID = std::numeric_limits<uint32_t>::max() - 1;
constexpr static inline uint32_t ROWSET_SENTINEL_MARK =
std::numeric_limits<uint32_t>::max() - 1;
/**
*