[feature-wip](unique-key-merge-on-write) opt lock and only save valid delete_bitmap (#11953)

1. use rlock in most logic instead of wrlock
2. filter stale rowset's delete bitmap in save meta
3. add a delete_bitmap lock to handle compaction and publish_txn confict

Co-authored-by: yixiutt <yixiu@selectdb.com>
This commit is contained in:
yixiutt
2022-08-23 14:43:40 +08:00
committed by GitHub
parent 30a13c8141
commit 60fddd56e7
10 changed files with 73 additions and 51 deletions

View File

@ -256,17 +256,23 @@ Status Compaction::construct_input_rowset_readers() {
Status Compaction::modify_rowsets() {
std::vector<RowsetSharedPtr> output_rowsets;
output_rowsets.push_back(_output_rowset);
std::lock_guard<std::shared_mutex> wrlock(_tablet->get_header_lock());
{
std::lock_guard<std::mutex> wrlock_(_tablet->get_rowset_update_lock());
std::lock_guard<std::shared_mutex> wrlock(_tablet->get_header_lock());
// update dst rowset delete bitmap
if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
_tablet->enable_unique_key_merge_on_write()) {
_tablet->tablet_meta()->update_delete_bitmap(_input_rowsets, _output_rs_writer->version(),
_rowid_conversion);
// update dst rowset delete bitmap
if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
_tablet->enable_unique_key_merge_on_write()) {
_tablet->tablet_meta()->update_delete_bitmap(
_input_rowsets, _output_rs_writer->version(), _rowid_conversion);
}
RETURN_NOT_OK(_tablet->modify_rowsets(output_rowsets, _input_rowsets, true));
}
{
std::shared_lock rlock(_tablet->get_header_lock());
_tablet->save_meta();
}
RETURN_NOT_OK(_tablet->modify_rowsets(output_rowsets, _input_rowsets, true));
_tablet->save_meta();
return Status::OK();
}

View File

@ -282,7 +282,7 @@ Status DeltaWriter::wait_flush() {
void DeltaWriter::_reset_mem_table() {
if (_tablet->enable_unique_key_merge_on_write()) {
_delete_bitmap.reset(new DeleteBitmap(-1));
_delete_bitmap.reset(new DeleteBitmap(_tablet->tablet_id()));
}
_mem_table.reset(new MemTable(_tablet, _schema.get(), _tablet_schema.get(), _req.slots,
_req.tuple_desc, _rowset_writer.get(), _delete_bitmap,

View File

@ -400,20 +400,21 @@ bool MemTable::need_to_agg() {
Status MemTable::_generate_delete_bitmap() {
// generate delete bitmap, build a tmp rowset and load recent segment
if (_tablet->enable_unique_key_merge_on_write()) {
auto rowset = _rowset_writer->build_tmp();
auto beta_rowset = reinterpret_cast<BetaRowset*>(rowset.get());
std::vector<segment_v2::SegmentSharedPtr> segments;
segment_v2::SegmentSharedPtr segment;
if (beta_rowset->num_segments() == 0) {
return Status::OK();
}
RETURN_IF_ERROR(beta_rowset->load_segment(beta_rowset->num_segments() - 1, &segment));
segments.push_back(segment);
std::lock_guard<std::shared_mutex> meta_wrlock(_tablet->get_header_lock());
RETURN_IF_ERROR(_tablet->calc_delete_bitmap(beta_rowset->rowset_id(), segments,
&_rowset_ids, _delete_bitmap));
if (!_tablet->enable_unique_key_merge_on_write()) {
return Status::OK();
}
auto rowset = _rowset_writer->build_tmp();
auto beta_rowset = reinterpret_cast<BetaRowset*>(rowset.get());
std::vector<segment_v2::SegmentSharedPtr> segments;
segment_v2::SegmentSharedPtr segment;
if (beta_rowset->num_segments() == 0) {
return Status::OK();
}
RETURN_IF_ERROR(beta_rowset->load_segment(beta_rowset->num_segments() - 1, &segment));
segments.push_back(segment);
std::shared_lock meta_rlock(_tablet->get_header_lock());
RETURN_IF_ERROR(_tablet->calc_delete_bitmap(beta_rowset->rowset_id(), segments, &_rowset_ids,
_delete_bitmap));
return Status::OK();
}

View File

@ -2064,7 +2064,8 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset, DeleteBitmapP
std::vector<segment_v2::SegmentSharedPtr> segments;
_load_rowset_segments(rowset, &segments);
std::lock_guard<std::shared_mutex> meta_wrlock(_meta_lock);
std::lock_guard<std::mutex> rwlock(_rowset_update_lock);
std::shared_lock meta_rlock(_meta_lock);
cur_rowset_ids = all_rs_id();
_rowset_ids_difference(cur_rowset_ids, pre_rowset_ids, &rowset_ids_to_add, &rowset_ids_to_del);
if (!rowset_ids_to_add.empty() || !rowset_ids_to_del.empty()) {
@ -2079,7 +2080,9 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset, DeleteBitmapP
delete_bitmap, true));
}
// update version
// update version without write lock, compaction and publish_txn
// will update delete bitmap, handle compaction with _delete_bitmap_lock
// and publish_txn runs sequencial so no need to lock here
for (auto iter = delete_bitmap->delete_bitmap.begin();
iter != delete_bitmap->delete_bitmap.end(); ++iter) {
int ret = _tablet_meta->delete_bitmap().set(

View File

@ -165,6 +165,7 @@ public:
// meta lock
std::shared_mutex& get_header_lock() { return _meta_lock; }
std::mutex& get_rowset_update_lock() { return _rowset_update_lock; }
std::mutex& get_push_lock() { return _ingest_lock; }
std::mutex& get_base_compaction_lock() { return _base_compaction_lock; }
std::mutex& get_cumulative_compaction_lock() { return _cumulative_compaction_lock; }
@ -392,6 +393,13 @@ private:
// TODO(lingbin): There is a _meta_lock TabletMeta too, there should be a comment to
// explain how these two locks work together.
mutable std::shared_mutex _meta_lock;
// In unique key table with MoW, we should guarantee that only one
// writer can update rowset and delete bitmap at the same time.
// We use a separate lock rather than _meta_lock, to avoid blocking read queries
// during publish_txn, which might take hundreds of milliseconds
mutable std::mutex _rowset_update_lock;
// After version 0.13, all newly created rowsets are saved in _rs_version_map.
// And if rowset being compacted, the old rowsetis will be saved in _stale_rs_version_map;
std::unordered_map<Version, RowsetSharedPtr, HashOfVersion> _rs_version_map;

View File

@ -536,11 +536,17 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) {
tablet_meta_pb->set_storage_policy(_storage_policy);
tablet_meta_pb->set_enable_unique_key_merge_on_write(_enable_unique_key_merge_on_write);
{
std::shared_lock l(delete_bitmap().lock);
if (_enable_unique_key_merge_on_write) {
std::set<RowsetId> rs_ids;
for (const auto& rowset : _rs_metas) {
rs_ids.insert(rowset->rowset_id());
}
DeleteBitmapPB* delete_bitmap_pb = tablet_meta_pb->mutable_delete_bitmap();
for (auto& [id, bitmap] : delete_bitmap().delete_bitmap) {
for (auto& [id, bitmap] : delete_bitmap().snapshot().delete_bitmap) {
auto& [rowset_id, segment_id, ver] = id;
if (rs_ids.count(rowset_id) == 0) {
continue;
}
delete_bitmap_pb->add_rowset_ids(rowset_id.to_string());
delete_bitmap_pb->add_segment_ids(segment_id);
delete_bitmap_pb->add_versions(ver);

View File

@ -404,7 +404,7 @@ public:
static std::once_flag once;
std::call_once(once, [size_in_bytes] {
auto tmp = new ShardedLRUCache("DeleteBitmap AggCache", size_in_bytes,
LRUCacheType::SIZE, 2048);
LRUCacheType::SIZE, 256);
AggCache::s_repr.store(tmp, std::memory_order_release);
});

View File

@ -126,11 +126,11 @@ Status EnginePublishVersionTask::finish() {
max_version = tablet->max_version();
}
if (version.first != max_version.second + 1) {
LOG(INFO) << "uniq key with merge-on-write version not continuous, current "
"max "
"version="
<< max_version.second << ", publish_version=" << version.first
<< " tablet_id=" << tablet->tablet_id();
VLOG_NOTICE << "uniq key with merge-on-write version not continuous, current "
"max "
"version="
<< max_version.second << ", publish_version=" << version.first
<< " tablet_id=" << tablet->tablet_id();
meet_version_not_continuous = true;
res = Status::OLAPInternalError(OLAP_ERR_PUBLISH_VERSION_NOT_CONTINUOUS);
continue;
@ -180,7 +180,7 @@ Status EnginePublishVersionTask::finish() {
LOG(INFO) << "finish to publish version on transaction."
<< "transaction_id=" << transaction_id
<< ", error_tablet_size=" << _error_tablet_ids->size();
<< ", error_tablet_size=" << _error_tablet_ids->size() << ", res=" << res.to_string();
return res;
}

View File

@ -309,6 +309,20 @@ Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id,
// save meta need access disk, it maybe very slow, so that it is not in global txn lock
// it is under a single txn lock
if (rowset_ptr != nullptr) {
// update delete_bitmap
{
if (load_info != nullptr && load_info->unique_key_merge_on_write) {
auto tablet =
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
if (tablet == nullptr) {
return Status::OK();
}
RETURN_IF_ERROR(tablet->update_delete_bitmap(
rowset_ptr, load_info->delete_bitmap, load_info->rowset_ids));
std::shared_lock rlock(tablet->get_header_lock());
tablet->save_meta();
}
}
// TODO(ygl): rowset is already set version here, memory is changed, if save failed
// it maybe a fatal error
rowset_ptr->make_visible(version);
@ -325,21 +339,6 @@ Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id,
return Status::OLAPInternalError(OLAP_ERR_TRANSACTION_NOT_EXIST);
}
}
// update delete_bitmap
{
auto tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
#ifdef BE_TEST
if (tablet == nullptr) {
return Status::OK();
}
#endif
if (load_info != nullptr && load_info->unique_key_merge_on_write) {
RETURN_IF_ERROR(tablet->update_delete_bitmap(rowset_ptr, load_info->delete_bitmap,
load_info->rowset_ids));
std::lock_guard<std::shared_mutex> wrlock(tablet->get_header_lock());
tablet->save_meta();
}
}
{
std::unique_lock<std::mutex> txn_lock(_get_txn_lock(transaction_id));
std::lock_guard<std::shared_mutex> wrlock(_get_txn_map_lock(transaction_id));

View File

@ -147,6 +147,5 @@
"tablet_type": "TABLET_TYPE_DISK",
"replica_id": 0,
"storage_policy": "",
"delete_bitmap": {},
"enable_unique_key_merge_on_write": false
}