From 5b343911e89ee63c03c32875a0d5a45221925eca Mon Sep 17 00:00:00 2001 From: zhannngchen <48427519+zhannngchen@users.noreply.github.com> Date: Tue, 6 Feb 2024 17:37:13 +0800 Subject: [PATCH] [log](gc) add log for unused rowsets gc (#30854) --- be/src/olap/data_dir.cpp | 8 ++++---- be/src/olap/storage_engine.cpp | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 351e7fd992..da5d220a24 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -736,7 +736,7 @@ void DataDir::_perform_path_gc_by_rowset(const std::vector& tablet_ bool is_valid = doris::TabletManager::get_tablet_id_and_schema_hash_from_path( path, &tablet_id, &schema_hash); if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { - LOG(WARNING) << "unknown path:" << path; + LOG(WARNING) << "[path gc] unknown path:" << path; continue; } @@ -757,7 +757,7 @@ void DataDir::_perform_path_gc_by_rowset(const std::vector& tablet_ std::vector files; auto st = io::global_local_filesystem()->list(path, true, &files, &exists); if (!st.ok()) [[unlikely]] { - LOG(WARNING) << "fail to list tablet path " << path << " : " << st; + LOG(WARNING) << "[path gc] fail to list tablet path " << path << " : " << st; continue; } @@ -786,10 +786,10 @@ void DataDir::_perform_path_gc_by_rowset(const std::vector& tablet_ auto reclaim_rowset_file = [](const std::string& path) { auto st = io::global_local_filesystem()->delete_file(path); if (!st.ok()) [[unlikely]] { - LOG(WARNING) << "failed to delete garbage rowset file: " << st; + LOG(WARNING) << "[path gc] failed to delete garbage rowset file: " << st; return; } - LOG(INFO) << "delete garbage path: " << path; // Audit log + LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit log }; auto should_reclaim = [&, this](const RowsetId& rowset_id) { diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index fbfa0636c0..f1ba693a50 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -1074,8 +1074,12 @@ void StorageEngine::_parse_default_rowset_type() { } void StorageEngine::start_delete_unused_rowset() { + LOG(INFO) << "start to delete unused rowset, size: " << _unused_rowsets.size(); std::vector unused_rowsets_copy; unused_rowsets_copy.reserve(_unused_rowsets.size()); + auto due_to_use_count = 0; + auto due_to_not_delete_file = 0; + auto due_to_delayed_expired_ts = 0; { std::lock_guard lock(_gc_mutex); for (auto it = _unused_rowsets.begin(); it != _unused_rowsets.end();) { @@ -1092,9 +1096,20 @@ void StorageEngine::start_delete_unused_rowset() { it = _unused_rowsets.erase(it); } else { ++it; + if (rs.use_count() != 1) { + ++due_to_use_count; + } else if (!rs->need_delete_file()) { + ++due_to_not_delete_file; + } else { + ++due_to_delayed_expired_ts; + } } } } + LOG(INFO) << "collected " << unused_rowsets_copy.size() << " unused rowsets to remove, skipped " + << due_to_use_count << " rowsets due to use count > 1, skipped " + << due_to_not_delete_file << " rowsets due to don't need to delete file, skipped " + << due_to_delayed_expired_ts << " rowsets due to delayed expired timestamp."; for (auto&& rs : unused_rowsets_copy) { VLOG_NOTICE << "start to remove rowset:" << rs->rowset_id() << ", version:" << rs->version(); @@ -1107,6 +1122,7 @@ void StorageEngine::start_delete_unused_rowset() { Status status = rs->remove(); VLOG_NOTICE << "remove rowset:" << rs->rowset_id() << " finished. status:" << status; } + LOG(INFO) << "removed all collected unused rowsets"; } void StorageEngine::add_unused_rowset(RowsetSharedPtr rowset) {