[BUG] Fix stale path delete checking logic when current main path is missing. (#4549)
Fix stale path delete checking logic. When current main path is version missing, then delete checking logic is always core dumped. So we fix the checking logic to tolerate current main version missing.
This commit is contained in:
@ -440,6 +440,11 @@ void Tablet::delete_expired_stale_rowset() {
|
||||
LOG(WARNING) << "lastest_delta is null " << tablet_id();
|
||||
return;
|
||||
}
|
||||
|
||||
// fetch missing version before delete
|
||||
std::vector<Version> missed_versions;
|
||||
calc_missed_versions_unlocked(lastest_delta->end_version(), &missed_versions);
|
||||
|
||||
// do check consistent operation
|
||||
auto path_id_iter = path_id_vec.begin();
|
||||
|
||||
@ -452,21 +457,58 @@ void Tablet::delete_expired_stale_rowset() {
|
||||
stale_version_path_map[*path_id_iter] = version_path;
|
||||
|
||||
OLAPStatus status = capture_consistent_versions(test_version, nullptr);
|
||||
// When there is no consistent versions, we must reconstruct the tracker.
|
||||
// 1. When there is no consistent versions, we must reconstruct the tracker.
|
||||
if (status != OLAP_SUCCESS) {
|
||||
LOG(WARNING) << "The consistent version check fails, there are bugs. "
|
||||
<< "Reconstruct the tracker to recover versions in tablet=" << tablet_id();
|
||||
|
||||
// 2. fetch missing version after delete
|
||||
std::vector<Version> after_missed_versions;
|
||||
calc_missed_versions_unlocked(lastest_delta->end_version(), &after_missed_versions);
|
||||
|
||||
_timestamped_version_tracker.recover_versioned_tracker(stale_version_path_map);
|
||||
// 2.1 check whether missed_versions and after_missed_versions are the same.
|
||||
// when they are the same, it means we can delete the path securely.
|
||||
bool is_missng = missed_versions.size() != after_missed_versions.size();
|
||||
|
||||
if (!is_missng) {
|
||||
for (int ver_index = 0; ver_index < missed_versions.size(); ver_index++) {
|
||||
if(missed_versions[ver_index] != after_missed_versions[ver_index]) {
|
||||
is_missng = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// double check the consistent versions
|
||||
status = capture_consistent_versions(test_version, nullptr);
|
||||
if (is_missng) {
|
||||
LOG(WARNING) << "The consistent version check fails, there are bugs. "
|
||||
<< "Reconstruct the tracker to recover versions in tablet=" << tablet_id();
|
||||
|
||||
if (status != OLAP_SUCCESS) {
|
||||
if (!config::ignore_rowset_stale_unconsistent_delete) {
|
||||
LOG(FATAL) << "rowset stale unconsistent delete. tablet= " << tablet_id();
|
||||
} else {
|
||||
LOG(WARNING) << "rowset stale unconsistent delete. tablet= " << tablet_id();
|
||||
// 3. try to recover
|
||||
_timestamped_version_tracker.recover_versioned_tracker(stale_version_path_map);
|
||||
|
||||
// 4. double check the consistent versions
|
||||
// fetch missing version after recover
|
||||
std::vector<Version> recover_missed_versions;
|
||||
calc_missed_versions_unlocked(lastest_delta->end_version(), &recover_missed_versions);
|
||||
|
||||
// 4.1 check whether missed_versions and recover_missed_versions are the same.
|
||||
// when they are the same, it means we recover successlly.
|
||||
bool is_recover_missng = missed_versions.size() != recover_missed_versions.size();
|
||||
|
||||
if (!is_recover_missng) {
|
||||
for (int ver_index = 0; ver_index < missed_versions.size(); ver_index++) {
|
||||
if(missed_versions[ver_index] != recover_missed_versions[ver_index]) {
|
||||
is_recover_missng = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. check recover fail, version is mission
|
||||
if (is_recover_missng) {
|
||||
if (!config::ignore_rowset_stale_unconsistent_delete) {
|
||||
LOG(FATAL) << "rowset stale unconsistent delete. tablet= " << tablet_id();
|
||||
} else {
|
||||
LOG(WARNING) << "rowset stale unconsistent delete. tablet= " << tablet_id();
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user