[Enhancement](merge-on-write) add correctness check for the calculation of delete bitmap (#22282)
Currently, for merge-on-write unique table, the delete bitmap of a rowset will be calculated during flush phase, commit phase and publish phase. In this PR, we add a special mark in every rowset considered when we calculate delete bitmap in these three phases. Before we finally merge the delete bitmap to the table meta's delete bitmap, we will check if all the rowsets contain the special mark to check if we have considered all the rowsets during the above three phases. Because the executor can not fail in publish phase if the coordinator have received successful commits info from all the executors, we just print logs if this correctness check failed rather than report a failure.
This commit is contained in:
@ -1054,6 +1054,8 @@ DEFINE_Bool(enable_hdfs_hedged_read, "false");
|
||||
DEFINE_Int32(hdfs_hedged_read_thread_num, "128");
|
||||
DEFINE_Int32(hdfs_hedged_read_threshold_time, "500");
|
||||
|
||||
DEFINE_mBool(enable_merge_on_write_correctness_check, "true");
|
||||
|
||||
// The secure path with user files, used in the `local` table function.
|
||||
DEFINE_mString(user_files_secure_path, "${DORIS_HOME}");
|
||||
|
||||
|
||||
@ -1111,6 +1111,8 @@ DECLARE_Int32(hdfs_hedged_read_thread_num);
|
||||
// Maybe overwritten by the value specified when creating catalog
|
||||
DECLARE_Int32(hdfs_hedged_read_threshold_time);
|
||||
|
||||
DECLARE_mBool(enable_merge_on_write_correctness_check);
|
||||
|
||||
// The secure path with user files, used in the `local` table function.
|
||||
DECLARE_mString(user_files_secure_path);
|
||||
|
||||
|
||||
@ -451,6 +451,11 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
|
||||
}
|
||||
CHECK(use_default_or_null_flag.size() == num_rows);
|
||||
|
||||
if (config::enable_merge_on_write_correctness_check) {
|
||||
_tablet->add_sentinel_mark_to_delete_bitmap(_mow_context->delete_bitmap,
|
||||
_mow_context->rowset_ids);
|
||||
}
|
||||
|
||||
// read and fill block
|
||||
auto mutable_full_columns = full_block.mutate_columns();
|
||||
RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns, use_default_or_null_flag,
|
||||
|
||||
@ -2979,6 +2979,17 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset,
|
||||
}
|
||||
DCHECK_EQ(total, row_id) << "segment total rows: " << total << " row_id:" << row_id;
|
||||
|
||||
if (config::enable_merge_on_write_correctness_check) {
|
||||
RowsetIdUnorderedSet rowsetids;
|
||||
for (const auto& rowset : specified_rowsets) {
|
||||
rowsetids.emplace(rowset->rowset_id());
|
||||
LOG(INFO) << "[tabletID:" << tablet_id() << "]"
|
||||
<< "[add_sentinel_mark_to_delete_bitmap][end_version:" << end_version << "]"
|
||||
<< "add:" << rowset->rowset_id();
|
||||
}
|
||||
add_sentinel_mark_to_delete_bitmap(delete_bitmap, rowsetids);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
RETURN_IF_ERROR(generate_new_block_for_partial_update(
|
||||
rowset_schema, read_plan_ori, read_plan_update, rsid_to_rowset, &block));
|
||||
@ -3223,6 +3234,12 @@ Status Tablet::update_delete_bitmap_without_lock(const RowsetSharedPtr& rowset)
|
||||
<< ", rowset_ids: " << cur_rowset_ids.size() << ", cur max_version: " << cur_version
|
||||
<< ", transaction_id: " << -1 << ", cost: " << watch.get_elapse_time_us()
|
||||
<< "(us), total rows: " << total_rows;
|
||||
if (config::enable_merge_on_write_correctness_check) {
|
||||
// check if all the rowset has ROWSET_SENTINEL_MARK
|
||||
RETURN_IF_ERROR(
|
||||
_check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, cur_rowset_ids));
|
||||
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
|
||||
}
|
||||
for (auto iter = delete_bitmap->delete_bitmap.begin();
|
||||
iter != delete_bitmap->delete_bitmap.end(); ++iter) {
|
||||
_tablet_meta->delete_bitmap().merge(
|
||||
@ -3320,6 +3337,14 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset,
|
||||
<< ", cur max_version: " << cur_version << ", transaction_id: " << txn_id
|
||||
<< ", cost: " << watch.get_elapse_time_us() << "(us), total rows: " << total_rows;
|
||||
|
||||
if (config::enable_merge_on_write_correctness_check && rowset->num_rows() != 0) {
|
||||
// only do correctness check if the rowset has at least one row written
|
||||
// check if all the rowset has ROWSET_SENTINEL_MARK
|
||||
RETURN_IF_ERROR(
|
||||
_check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, cur_rowset_ids));
|
||||
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
|
||||
}
|
||||
|
||||
// update version without write lock, compaction and publish_txn
|
||||
// will update delete bitmap, handle compaction with _rowset_update_lock
|
||||
// and publish_txn runs sequential so no need to lock here
|
||||
@ -3653,4 +3678,37 @@ Status Tablet::calc_delete_bitmap_between_segments(
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void Tablet::add_sentinel_mark_to_delete_bitmap(DeleteBitmapPtr delete_bitmap,
|
||||
const RowsetIdUnorderedSet& rowsetids) {
|
||||
for (const auto& rowsetid : rowsetids) {
|
||||
delete_bitmap->add({rowsetid, DeleteBitmap::INVALID_SEGMENT_ID, 0},
|
||||
DeleteBitmap::ROWSET_SENTINEL_MARK);
|
||||
}
|
||||
}
|
||||
|
||||
void Tablet::_remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bitmap) {
|
||||
for (auto it = delete_bitmap->delete_bitmap.begin(), end = delete_bitmap->delete_bitmap.end();
|
||||
it != end;) {
|
||||
if (std::get<1>(it->first) == DeleteBitmap::INVALID_SEGMENT_ID) {
|
||||
it = delete_bitmap->delete_bitmap.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
|
||||
const RowsetIdUnorderedSet& rowset_ids) const {
|
||||
for (const auto& rowsetid : rowset_ids) {
|
||||
if (!delete_bitmap->delete_bitmap.contains(
|
||||
{rowsetid, DeleteBitmap::INVALID_SEGMENT_ID, 0})) {
|
||||
LOG(WARNING) << "check delete bitmap correctness failed, can't find sentinel mark in "
|
||||
"rowset with RowsetId:"
|
||||
<< rowsetid << ",max_version:" << max_version;
|
||||
DCHECK(false) << "check delete bitmap correctness failed!";
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace doris
|
||||
|
||||
@ -549,6 +549,8 @@ public:
|
||||
int64_t binlog_max_bytes() const { return _tablet_meta->binlog_config().max_bytes(); }
|
||||
|
||||
void set_binlog_config(BinlogConfig binlog_config);
|
||||
void add_sentinel_mark_to_delete_bitmap(DeleteBitmapPtr delete_bitmap,
|
||||
const RowsetIdUnorderedSet& rowsetids);
|
||||
|
||||
private:
|
||||
Status _init_once_action();
|
||||
@ -597,6 +599,10 @@ private:
|
||||
// end cooldown functions
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void _remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bitmap);
|
||||
Status _check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
|
||||
const RowsetIdUnorderedSet& rowset_ids) const;
|
||||
|
||||
public:
|
||||
static const int64_t K_INVALID_CUMULATIVE_POINT = -1;
|
||||
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
@ -335,6 +336,9 @@ public:
|
||||
using Version = uint64_t;
|
||||
using BitmapKey = std::tuple<RowsetId, SegmentId, Version>;
|
||||
std::map<BitmapKey, roaring::Roaring> delete_bitmap; // Ordered map
|
||||
constexpr static inline uint32_t INVALID_SEGMENT_ID = std::numeric_limits<uint32_t>::max() - 1;
|
||||
constexpr static inline uint32_t ROWSET_SENTINEL_MARK =
|
||||
std::numeric_limits<uint32_t>::max() - 1;
|
||||
|
||||
/**
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user