Pick "[fix](gclog) Skip tablet dir without schema hash dir in path gc (#32793)" (#35978)

## Proposed changes
Pick "[fix](gclog) Skip tablet dir without schema hash dir in path gc
(#32793)"
This commit is contained in:
plat1ko
2024-06-06 22:24:30 +08:00
committed by GitHub
parent 5966354165
commit af779f5cd8
5 changed files with 150 additions and 181 deletions

View File

@ -662,170 +662,144 @@ Status DataDir::load() {
return Status::OK();
}
void DataDir::perform_path_gc() {
auto tablet_paths = _perform_path_scan();
_perform_path_gc_by_tablet(tablet_paths);
_perform_path_gc_by_rowset(tablet_paths);
}
// gc unused tablet schemahash dir
void DataDir::_perform_path_gc_by_tablet(std::vector<std::string>& tablet_paths) {
if (_stop_bg_worker) return;
if (tablet_paths.empty()) {
return;
}
LOG(INFO) << "start to path gc by tablet, dir=" << _path;
// Use double pointer to avoid move elements after erase a element
auto forward = tablet_paths.begin();
auto backward = tablet_paths.end();
int counter = 0;
do {
if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) {
std::this_thread::sleep_for(
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
}
auto& path = *forward;
TTabletId tablet_id = -1;
TSchemaHash schema_hash = -1;
bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id,
&schema_hash);
if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
LOG(WARNING) << "unknown path:" << path;
--backward;
std::swap(*forward, *backward);
continue;
}
auto tablet = _tablet_manager->get_tablet(tablet_id);
if (!tablet || tablet->data_dir() != this) {
if (tablet) {
LOG(INFO) << "The tablet in path " << path
<< " is not same with the running one: " << tablet->data_dir()->_path
<< "/" << tablet->tablet_path()
<< ", might be the old tablet after migration, try to move it to trash";
}
_tablet_manager->try_delete_unused_tablet_path(this, tablet_id, schema_hash, path);
--backward;
std::swap(*forward, *backward);
continue;
}
// could find the tablet, then skip check it
++forward;
} while (forward != backward && !_stop_bg_worker);
tablet_paths.erase(backward, tablet_paths.end());
LOG(INFO) << "finish path gc by tablet, dir=" << _path;
}
void DataDir::_perform_path_gc_by_rowset(const std::vector<std::string>& tablet_paths) {
if (_stop_bg_worker || tablet_paths.empty()) {
// gc unused local tablet dir
void DataDir::_perform_tablet_gc(const std::string& tablet_schema_hash_path) {
if (_stop_bg_worker) {
return;
}
LOG(INFO) << "start to path gc by rowset";
int counter = 0;
for (const auto& path : tablet_paths) {
if (_stop_bg_worker) {
break;
TTabletId tablet_id = -1;
TSchemaHash schema_hash = -1;
bool is_valid = TabletManager::get_tablet_id_and_schema_hash_from_path(
tablet_schema_hash_path, &tablet_id, &schema_hash);
if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path;
return;
}
auto tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
if (!tablet || tablet->data_dir() != this) {
if (tablet) {
LOG(INFO) << "The tablet in path " << tablet_schema_hash_path
<< " is not same with the running one: " << tablet->data_dir()->_path << "/"
<< tablet->tablet_path()
<< ", might be the old tablet after migration, try to move it to trash";
}
StorageEngine::instance()->tablet_manager()->try_delete_unused_tablet_path(
this, tablet_id, schema_hash, tablet_schema_hash_path);
return;
}
_perform_rowset_gc(tablet_schema_hash_path);
}
// gc unused local rowsets under tablet dir
void DataDir::_perform_rowset_gc(const std::string& tablet_schema_hash_path) {
if (_stop_bg_worker) {
return;
}
TTabletId tablet_id = -1;
TSchemaHash schema_hash = -1;
bool is_valid = doris::TabletManager::get_tablet_id_and_schema_hash_from_path(
tablet_schema_hash_path, &tablet_id, &schema_hash);
if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path;
return;
}
auto tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
if (!tablet) {
// Could not found the tablet, maybe it's a dropped tablet, will be reclaimed
// in the next time `_perform_path_gc_by_tablet`
return;
}
if (tablet->data_dir() != this) {
// Current running tablet is not in same data_dir, maybe it's a tablet after migration,
// will be reclaimed in the next time `_perform_path_gc_by_tablet`
return;
}
bool exists;
std::vector<io::FileInfo> files;
auto st = io::global_local_filesystem()->list(tablet_schema_hash_path, true, &files, &exists);
if (!st.ok()) [[unlikely]] {
LOG(WARNING) << "[path gc] fail to list tablet path " << tablet_schema_hash_path << " : "
<< st;
return;
}
// Rowset files excluding pending rowsets
std::vector<std::pair<RowsetId, std::string /* filename */>> rowsets_not_pending;
for (auto&& file : files) {
auto rowset_id = extract_rowset_id(file.file_name);
if (rowset_id.hi == 0) {
continue; // Not a rowset
}
++counter;
if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) {
std::this_thread::sleep_for(
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
}
TTabletId tablet_id = -1;
TSchemaHash schema_hash = -1;
bool is_valid = doris::TabletManager::get_tablet_id_and_schema_hash_from_path(
path, &tablet_id, &schema_hash);
if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
LOG(WARNING) << "[path gc] unknown path:" << path;
continue;
if (StorageEngine::instance()->pending_local_rowsets().contains(rowset_id)) {
continue; // Pending rowset file
}
auto tablet = _tablet_manager->get_tablet(tablet_id);
if (!tablet) {
// Could not found the tablet, maybe it's a dropped tablet, will be reclaimed
// in the next time `_perform_path_gc_by_tablet`
continue;
}
rowsets_not_pending.emplace_back(rowset_id, std::move(file.file_name));
}
if (tablet->data_dir() != this) {
// Current running tablet is not in same data_dir, maybe it's a tablet after migration,
// will be reclaimed in the next time `_perform_path_gc_by_tablet`
continue;
}
RowsetIdUnorderedSet rowsets_in_version_map;
tablet->traverse_rowsets(
[&rowsets_in_version_map](auto& rs) { rowsets_in_version_map.insert(rs->rowset_id()); },
true);
bool exists;
std::vector<io::FileInfo> files;
auto st = io::global_local_filesystem()->list(path, true, &files, &exists);
auto reclaim_rowset_file = [](const std::string& path) {
auto st = io::global_local_filesystem()->delete_file(path);
if (!st.ok()) [[unlikely]] {
LOG(WARNING) << "[path gc] fail to list tablet path " << path << " : " << st;
LOG(WARNING) << "[path gc] failed to delete garbage rowset file: " << st;
return;
}
LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit log
};
auto should_reclaim = [&, this](const RowsetId& rowset_id) {
return !rowsets_in_version_map.contains(rowset_id) &&
!StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id) &&
RowsetMetaManager::exists(get_meta(), tablet->tablet_uid(), rowset_id)
.is<META_KEY_NOT_FOUND>();
};
// rowset_id -> is_garbage
std::unordered_map<RowsetId, bool> checked_rowsets;
for (auto&& [rowset_id, filename] : rowsets_not_pending) {
if (_stop_bg_worker) {
return;
}
if (auto it = checked_rowsets.find(rowset_id); it != checked_rowsets.end()) {
if (it->second) { // Is checked garbage rowset
reclaim_rowset_file(tablet_schema_hash_path + '/' + filename);
}
continue;
}
// Rowset files excluding pending rowsets
std::vector<std::pair<RowsetId, std::string /* filename */>> rowsets_not_pending;
for (auto&& file : files) {
auto rowset_id = extract_rowset_id(file.file_name);
if (rowset_id.hi == 0) {
continue; // Not a rowset
}
if (StorageEngine::instance()->pending_local_rowsets().contains(rowset_id)) {
continue; // Pending rowset file
}
rowsets_not_pending.emplace_back(rowset_id, std::move(file.file_name));
}
RowsetIdUnorderedSet rowsets_in_version_map;
tablet->traverse_rowsets(
[&rowsets_in_version_map](auto& rs) {
rowsets_in_version_map.insert(rs->rowset_id());
},
true);
auto reclaim_rowset_file = [](const std::string& path) {
auto st = io::global_local_filesystem()->delete_file(path);
if (!st.ok()) [[unlikely]] {
LOG(WARNING) << "[path gc] failed to delete garbage rowset file: " << st;
return;
}
LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit log
};
auto should_reclaim = [&, this](const RowsetId& rowset_id) {
return !rowsets_in_version_map.contains(rowset_id) &&
!StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id) &&
RowsetMetaManager::exists(get_meta(), tablet->tablet_uid(), rowset_id)
.is<META_KEY_NOT_FOUND>();
};
// rowset_id -> is_garbage
std::unordered_map<RowsetId, bool> checked_rowsets;
for (auto&& [rowset_id, filename] : rowsets_not_pending) {
if (auto it = checked_rowsets.find(rowset_id); it != checked_rowsets.end()) {
if (it->second) { // Is checked garbage rowset
reclaim_rowset_file(path + '/' + filename);
}
continue;
}
if (should_reclaim(rowset_id)) {
reclaim_rowset_file(path + '/' + filename);
checked_rowsets.emplace(rowset_id, true);
} else {
checked_rowsets.emplace(rowset_id, false);
if (should_reclaim(rowset_id)) {
if (config::path_gc_check_step > 0 &&
++_path_gc_step % config::path_gc_check_step == 0) {
std::this_thread::sleep_for(
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
}
reclaim_rowset_file(tablet_schema_hash_path + '/' + filename);
checked_rowsets.emplace(rowset_id, true);
} else {
checked_rowsets.emplace(rowset_id, false);
}
}
LOG(INFO) << "finish path gc by rowset.";
}
// path producer
std::vector<std::string> DataDir::_perform_path_scan() {
std::vector<std::string> tablet_paths;
if (_stop_bg_worker) return tablet_paths;
LOG(INFO) << "start to scan data dir " << _path;
void DataDir::perform_path_gc() {
if (_stop_bg_worker) {
return;
}
LOG(INFO) << "start to gc data dir " << _path;
auto data_path = fmt::format("{}/{}", _path, DATA_PREFIX);
std::vector<io::FileInfo> shards;
bool exists = true;
@ -833,13 +807,18 @@ std::vector<std::string> DataDir::_perform_path_scan() {
auto st = fs->list(data_path, false, &shards, &exists);
if (!st.ok()) [[unlikely]] {
LOG(WARNING) << "failed to scan data dir: " << st;
return tablet_paths;
return;
}
for (const auto& shard : shards) {
if (_stop_bg_worker) break;
if (_stop_bg_worker) {
break;
}
if (shard.is_file) {
continue;
}
auto shard_path = fmt::format("{}/{}", data_path, shard.file_name);
std::vector<io::FileInfo> tablet_ids;
st = io::global_local_filesystem()->list(shard_path, false, &tablet_ids, &exists);
@ -847,31 +826,41 @@ std::vector<std::string> DataDir::_perform_path_scan() {
LOG(WARNING) << "fail to walk dir, shard_path=" << shard_path << " : " << st;
continue;
}
for (const auto& tablet_id : tablet_ids) {
if (_stop_bg_worker) {
break;
}
if (tablet_id.is_file) {
continue;
}
auto tablet_id_path = fmt::format("{}/{}", shard_path, tablet_id.file_name);
std::vector<io::FileInfo> schema_hashes;
st = io::global_local_filesystem()->list(tablet_id_path, false, &schema_hashes,
&exists);
st = fs->list(tablet_id_path, false, &schema_hashes, &exists);
if (!st.ok()) [[unlikely]] {
LOG(WARNING) << "fail to walk dir, tablet_id_path=" << tablet_id_path << " : "
<< st;
continue;
}
if (schema_hashes.size() != 1 || schema_hashes[0].is_file) [[unlikely]] {
LOG(WARNING) << "invalid tablet_path, path=" << tablet_id_path;
continue;
for (auto&& schema_hash : schema_hashes) {
if (schema_hash.is_file) {
continue;
}
if (config::path_gc_check_step > 0 &&
++_path_gc_step % config::path_gc_check_step == 0) {
std::this_thread::sleep_for(
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
}
_perform_tablet_gc(tablet_id_path + '/' + schema_hash.file_name);
}
tablet_paths.push_back(tablet_id_path + '/' + schema_hashes[0].file_name);
}
}
LOG(INFO) << "scan data dir path: " << _path << " finished. path size: " << tablet_paths.size();
return tablet_paths;
LOG(INFO) << "gc data dir path: " << _path << " finished";
}
Status DataDir::update_capacity() {

View File

@ -159,11 +159,11 @@ private:
// process will log fatal.
Status _check_incompatible_old_format_tablet();
std::vector<std::string> _perform_path_scan();
int _path_gc_step {0};
void _perform_path_gc_by_tablet(std::vector<std::string>& tablet_paths);
void _perform_tablet_gc(const std::string& tablet_schema_hash_path);
void _perform_path_gc_by_rowset(const std::vector<std::string>& tablet_paths);
void _perform_rowset_gc(const std::string& tablet_schema_hash_path);
private:
std::atomic<bool> _stop_bg_worker = false;

View File

@ -845,7 +845,7 @@ Status TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_
// For case 1 doesn't need path check because BE is just starting and not ready,
// just check tablet meta status to judge whether tablet is delete is enough.
// For case 2, If a tablet has just been copied to local BE,
// it may be cleared by gc-thread(see perform_path_gc_by_tablet) because the tablet meta may not be loaded to memory.
// it may be cleared by gc-thread(see perform_tablet_gc) because the tablet meta may not be loaded to memory.
// So clone task should check path and then failed and retry in this case.
if (check_path) {
bool exists = true;

View File

@ -45,9 +45,6 @@ ExecEnv::~ExecEnv() {
// TODO(plat1ko): template <class Engine>
#ifdef BE_TEST
void ExecEnv::set_storage_engine(std::unique_ptr<BaseStorageEngine>&& engine) {
_storage_engine = std::move(engine);
}
void ExecEnv::set_write_cooldown_meta_executors() {
_write_cooldown_meta_executors = std::make_unique<WriteCooldownMetaExecutors>();
}

View File

@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
#include <gen_cpp/Types_types.h>
#include <gtest/gtest.h>
#include <random>
@ -83,23 +82,7 @@ TEST(PathGcTest, GcTabletAndRowset) {
ASSERT_TRUE(st.ok()) << st;
}
// Test path scan
auto paths = data_dir._perform_path_scan();
ASSERT_EQ(paths.size(), 20);
// Test tablet gc
config::path_gc_check_step = 0;
data_dir._perform_path_gc_by_tablet(paths);
ASSERT_EQ(paths.size(), 10);
std::vector<std::string_view> expected_paths;
for (auto&& tablet : active_tablets) {
expected_paths.emplace_back(tablet->tablet_path());
}
std::sort(expected_paths.begin(), expected_paths.end());
std::sort(paths.begin(), paths.end());
for (size_t i = 0; i < paths.size(); ++i) {
EXPECT_EQ(paths[i], expected_paths[i]);
}
// Prepare rowsets
auto rng = std::default_random_engine {static_cast<uint32_t>(::time(nullptr))};
@ -187,7 +170,7 @@ TEST(PathGcTest, GcTabletAndRowset) {
}
// Test rowset gc
data_dir._perform_path_gc_by_rowset(paths);
data_dir.perform_path_gc();
for (auto&& t : active_tablets) {
std::vector<io::FileInfo> files;
bool exists;