From cb1b1ac45a965f4f2808d5d849f62c68342abd3c Mon Sep 17 00:00:00 2001 From: obdev Date: Fri, 2 Dec 2022 10:42:33 +0000 Subject: [PATCH] fix check_majority_integrated about check_merge_progress --- .../ob_major_merge_progress_checker.cpp | 72 +++++++++++++++++-- .../freeze/ob_major_merge_progress_checker.h | 8 +++ .../ob_tablet_replica_checksum_iterator.cpp | 2 +- .../ob_tablet_replica_checksum_operator.cpp | 2 + 4 files changed, 79 insertions(+), 5 deletions(-) diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp index e0ffae7440..bddb20455e 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp @@ -201,7 +201,7 @@ int ObMajorMergeProgressChecker::check_tablet( } else if (OB_FAIL(check_majority_integrated(schema_guard, tablet, ls_info))) { LOG_WARN("fail to check majority integrated", KR(ret)); } else if (OB_FAIL(check_tablet_data_version(all_progress, global_broadcast_scn, tablet, ls_info))) { - LOG_WARN("fail to check majority integrated", KR(ret)); + LOG_WARN("fail to check data version", KR(ret)); } } @@ -218,8 +218,19 @@ int ObMajorMergeProgressChecker::check_tablet_data_version( const ObLSReplica *ls_r = nullptr; FOREACH_CNT_X(r, tablet.get_replicas(), OB_SUCCESS == ret) { - if (OB_FAIL(ls_info.find(r->get_server(), ls_r))) { - LOG_WARN("fail to find lfs replica", "addr", r->get_server(), KR(ret)); + if (OB_ISNULL(r)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid replica", KR(ret), K_(tenant_id), K(tablet)); + } else if (OB_FAIL(ls_info.find(r->get_server(), ls_r))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // Ignore tablet replicas that are not in ls_info. E.g., after ls replica migration, + // source ls meta has been deleted, but source tablet meta has not been deleted yet. + ret = OB_SUCCESS; // ignore ret + LOG_INFO("ignore this tablet replica, sicne it is not in ls_info", K_(tenant_id), + KPC(r), K(ls_info)); + } else { + LOG_WARN("fail to find ls replica", KR(ret), "addr", r->get_server()); + } } else if (OB_UNLIKELY(nullptr == ls_r)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid ls replica", KR(ret), KPC(r)); @@ -272,10 +283,14 @@ int ObMajorMergeProgressChecker::check_majority_integrated( int64_t all_replica_num = OB_INVALID_COUNT; int64_t full_replica_num = OB_INVALID_COUNT; int64_t paxos_replica_num = OB_INVALID_COUNT; + bool is_in_member_list = false; + ObLSReplica::MemberList member_list; if (OB_FAIL(get_associated_replica_num(schema_guard, paxos_replica_num, full_replica_num, all_replica_num, majority))) { LOG_WARN("fail to get associated replica num", KR(ret), K_(tenant_id)); + } else if (OB_FAIL(get_member_list(ls_info, member_list))) { // member_list of ls leader replica + LOG_WARN("fail to get member_list", KR(ret), K_(tenant_id), K(ls_info)); } else { const int64_t tablet_replica_cnt = tablet.replica_count(); int64_t paxos_cnt = 0; @@ -283,7 +298,15 @@ int ObMajorMergeProgressChecker::check_majority_integrated( FOREACH_CNT_X(r, tablet.get_replicas(), OB_SUCC(ret)) { if (OB_ISNULL(r)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid replica", KR(ret), K_(tenant_id), K(r)); + LOG_WARN("invalid replica", KR(ret), K_(tenant_id), K(tablet)); + } else if (OB_FAIL(is_replica_in_ls_member_list(*r, member_list, is_in_member_list))) { + LOG_WARN("fail to check if replica is in ls member_list", KR(ret), K_(tenant_id), + KPC(r), K(member_list)); + } else if (!is_in_member_list) { + // Ignore tablet replicas that are not in member list. E.g., after ls replica migration, + // source ls meta has been deleted, but source tablet meta has not been deleted yet. + LOG_INFO("ignore this tablet replica, sicne it is not in ls member_list", K_(tenant_id), + KPC(r), K(member_list)); } else if (OB_FAIL(ls_info.find(r->get_server(), ls_r))) { LOG_WARN("fail to find", "addr", r->get_server(), KR(ret)); } else if (OB_UNLIKELY(nullptr == ls_r)) { @@ -352,5 +375,46 @@ int ObMajorMergeProgressChecker::get_associated_replica_num( return ret; } +int ObMajorMergeProgressChecker::get_member_list( + const share::ObLSInfo &ls_info, + share::ObLSReplica::MemberList &member_list) const +{ + int ret = OB_SUCCESS; + const ObLSReplica *ls_leader_replica = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret), K_(tenant_id)); + } else if (OB_FAIL(ls_info.find_leader(ls_leader_replica))) { + LOG_WARN("fail to find ls leader replica", KR(ret), K_(tenant_id), K(ls_info)); + } else if (OB_ISNULL(ls_leader_replica)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls leader replica is null", KR(ret), K_(tenant_id), K(ls_info)); + } else { + member_list = ls_leader_replica->get_member_list(); + } + return ret; +} + +int ObMajorMergeProgressChecker::is_replica_in_ls_member_list( + const share::ObTabletReplica &replica, + const ObLSReplica::MemberList &member_list, + bool &is_in_member_list) const +{ + int ret = OB_SUCCESS; + is_in_member_list = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret), K_(tenant_id)); + } else { + for (int i = 0; i < member_list.count(); ++i) { + if (replica.get_server() == member_list.at(i).get_server()) { + is_in_member_list = true; + break; + } + } + } + return ret; +} + } // namespace rootserver } // namespace oceanbase diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.h b/src/rootserver/freeze/ob_major_merge_progress_checker.h index 93f5a5b4e5..e0dc2e7752 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.h +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.h @@ -26,6 +26,7 @@ class ObTabletTableOperator; class ObLSInfo; class ObLSTableOperator; class ObIServerTrace; +class ObLSReplica; namespace schema { class ObSchemaGetterGuard; @@ -75,6 +76,13 @@ private: int64_t &full_replica_num, int64_t &all_replica_num, int64_t &majority); + // get member_list of ls leader replica + int get_member_list(const share::ObLSInfo &ls_info, + share::ObLSReplica::MemberList &member_list) const; + int is_replica_in_ls_member_list(const share::ObTabletReplica &replica, + const share::ObLSReplica::MemberList &member_list, + bool &is_in_member_list) const; + private: bool is_inited_; uint64_t tenant_id_; diff --git a/src/share/ob_tablet_replica_checksum_iterator.cpp b/src/share/ob_tablet_replica_checksum_iterator.cpp index 941a05bbd7..6b833a7342 100644 --- a/src/share/ob_tablet_replica_checksum_iterator.cpp +++ b/src/share/ob_tablet_replica_checksum_iterator.cpp @@ -103,7 +103,7 @@ int ObTabletReplicaChecksumIterator::fetch_next_batch() ObTabletLSPair start_pair; if (checksum_items_.count() > 0) { ObTabletReplicaChecksumItem tmp_item; - if (OB_FAIL(checksum_items_.at(checksum_items_.count() - 1, tmp_item))) { + if (OB_FAIL(tmp_item.assign(checksum_items_.at(checksum_items_.count() - 1)))) { LOG_WARN("fail to fetch last checksum item", KR(ret), K_(tenant_id), K_(checksum_items)); } else if (OB_FAIL(start_pair.init(tmp_item.tablet_id_, tmp_item.ls_id_))) { LOG_WARN("fail to init start tablet_ls_pair", KR(ret), K(tmp_item)); diff --git a/src/share/ob_tablet_replica_checksum_operator.cpp b/src/share/ob_tablet_replica_checksum_operator.cpp index b3b9677099..877206c565 100644 --- a/src/share/ob_tablet_replica_checksum_operator.cpp +++ b/src/share/ob_tablet_replica_checksum_operator.cpp @@ -1083,6 +1083,8 @@ int ObTabletReplicaChecksumOperator::check_local_index_column_checksum( ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet count of local index table is not same with data table", KR(ret), "data_table_tablet_cnt", data_schema_tablet_ids.count(), "index_table_tablet_cnt", index_schema_tablet_ids.count()); + } else if (!need_verify) { + LOG_INFO("do not need verify checksum", K(index_table_id), K(data_table_id), K(compaction_scn)); } else { // map element: hash::ObHashMap data_column_ckm_map;