From 0f7a25367dd82db9b7dcd10ad401b50dc72d49e6 Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Sat, 19 Feb 2022 12:00:48 +0800 Subject: [PATCH] [fix](rowset-meta) Fix bug that rowset meta is not deleted (#8118) As described in #8120, a large number of rowset meta remain in rocksdb, which may be generated by: 1. drop tablet The drop tablet task itself just sets the state of the tablet meta to `SHUTDOWN` and moves the tablet to `_shutdown_tablets` vector then the background thread will periodically clean up the tablet in `_shutdown_tablets` (that's why even if we execute the `drop table xx force`, the tablet may be delayed by 10min to 1 hour before it goes into the trash directory). The regular cleanup thread in the background saves the complete tablet meta as a `.hdr` file when deleting the tablet, and then moves it to the trash directory along with the data files. But this process does not process the rowset meta (before doing the checkpoint of the tablet meta, the rowset meta is stored independently in rocksdb as a key-value). So this results in a residual rowset meta. 2. clone task The clone task may migrate back and forth between BEs, which may result in a situation where the tablet id is the same on the BE, but the tablet uuid is different. This leads to some rowset meta can not find the corresponding tablet, but there is no thread to process these rowsets, and eventually lead to residual. This is PR, I handled it in the regular cleanup thread with method `_clean_unused_rowset_metas()`. I did not delete rowset meta along with "drop tablet" task, because "drop tablet" itself is not a synchronous operation. It also relies on a background thread to clean up the tablet periodically. So I put this operation in the background cleanup thread. --- be/src/olap/data_dir.cpp | 11 ++++++++--- be/src/olap/storage_engine.cpp | 28 +++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 3820f2c186..6cf9e0ca64 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -477,7 +477,7 @@ OLAPStatus DataDir::load() { // 1. add committed rowset to txn map // 2. add visible rowset to tablet // ignore any errors when load tablet or rowset, because fe will repair them after report - int64_t tablet_not_found = 0; + int64_t invalid_rowset_counter = 0; for (auto rowset_meta : dir_rowset_metas) { TabletSharedPtr tablet = _tablet_manager->get_tablet(rowset_meta->tablet_id(), rowset_meta->tablet_schema_hash()); @@ -486,7 +486,7 @@ OLAPStatus DataDir::load() { VLOG_NOTICE << "could not find tablet id: " << rowset_meta->tablet_id() << ", schema hash: " << rowset_meta->tablet_schema_hash() << ", for rowset: " << rowset_meta->rowset_id() << ", skip this rowset"; - ++tablet_not_found; + ++invalid_rowset_counter; continue; } RowsetSharedPtr rowset; @@ -534,10 +534,15 @@ OLAPStatus DataDir::load() { << " schema hash: " << rowset_meta->tablet_schema_hash() << " txn: " << rowset_meta->txn_id() << " current valid tablet uid: " << tablet->tablet_uid(); + ++invalid_rowset_counter; } } + // At startup, we only count these invalid rowset, but do not actually delete it. + // The actual delete operation is in StorageEngine::_clean_unused_rowset_metas, + // which is cleaned up uniformly by the background cleanup thread. LOG(INFO) << "finish to load tablets from " << _path_desc.filepath << ", total rowset meta: " - << dir_rowset_metas.size() << ", tablet not found: " << tablet_not_found; + << dir_rowset_metas.size() << ", invalid rowset num: " << invalid_rowset_counter; + return OLAP_SUCCESS; } diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 201bffb664..823565be28 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -714,11 +714,12 @@ void StorageEngine::_clean_unused_rowset_metas() { std::vector invalid_rowset_metas; auto clean_rowset_func = [this, &invalid_rowset_metas](TabletUid tablet_uid, RowsetId rowset_id, const std::string& meta_str) -> bool { + // return false will break meta iterator, return true to skip this error RowsetMetaSharedPtr rowset_meta(new AlphaRowsetMeta()); bool parsed = rowset_meta->init(meta_str); if (!parsed) { LOG(WARNING) << "parse rowset meta string failed for rowset_id:" << rowset_id; - // return false will break meta iterator, return true to skip this error + invalid_rowset_metas.push_back(rowset_meta); return true; } if (rowset_meta->tablet_uid() != tablet_uid) { @@ -726,12 +727,32 @@ void StorageEngine::_clean_unused_rowset_metas() { << ", rowset_id=" << rowset_meta->rowset_id() << ", in_put_tablet_uid=" << tablet_uid << ", tablet_uid in rowset meta=" << rowset_meta->tablet_uid(); + invalid_rowset_metas.push_back(rowset_meta); return true; } TabletSharedPtr tablet = _tablet_manager->get_tablet( - rowset_meta->tablet_id(), rowset_meta->tablet_schema_hash(), tablet_uid); + rowset_meta->tablet_id(), rowset_meta->tablet_schema_hash()); if (tablet == nullptr) { + // tablet may be dropped + // TODO(cmy): this is better to be a VLOG, because drop table is a very common case. + // leave it as INFO log for observation. Maybe change it in future. + LOG(INFO) << "failed to find tablet " << rowset_meta->tablet_id() << " for rowset: " << rowset_meta->rowset_id() + << ", tablet may be dropped"; + invalid_rowset_metas.push_back(rowset_meta); + return true; + } + if (tablet->tablet_uid() != rowset_meta->tablet_uid()) { + // In this case, we get the tablet using the tablet id recorded in the rowset meta. + // but the uid in the tablet is different from the one recorded in the rowset meta. + // How this happened: + // Replica1 of Tablet A exists on BE1. Because of the clone task, a new replica2 is createed on BE2, + // and then replica1 deleted from BE1. After some time, we created replica again on BE1, + // which will creates a new tablet with the same id but a different uid. + // And in the historical version, when we deleted the replica, we did not delete the corresponding rowset meta, + // thus causing the original rowset meta to remain(with same tablet id but different uid). + LOG(WARNING) << "rowset's tablet uid " << rowset_meta->tablet_uid() << " does not equal to tablet uid: " << tablet->tablet_uid(); + invalid_rowset_metas.push_back(rowset_meta); return true; } if (rowset_meta->rowset_state() == RowsetStatePB::VISIBLE && @@ -750,8 +771,9 @@ void StorageEngine::_clean_unused_rowset_metas() { RowsetMetaManager::traverse_rowset_metas(data_dir->get_meta(), clean_rowset_func); for (auto& rowset_meta : invalid_rowset_metas) { RowsetMetaManager::remove(data_dir->get_meta(), rowset_meta->tablet_uid(), - rowset_meta->rowset_id()); + rowset_meta->rowset_id()); } + LOG(INFO) << "remove " << invalid_rowset_metas.size() << " invalid rowset meta from dir: " << data_dir->path(); invalid_rowset_metas.clear(); } }