[fix] do not remove normal member if another member is being rebuilt

This commit is contained in:
BinChenn
2023-07-17 02:42:09 +00:00
committed by ob-robot
parent 2886fd035e
commit 214101d63f
4 changed files with 72 additions and 19 deletions

View File

@ -314,6 +314,54 @@ TEST_F(TestObSimpleLogClusterConfigChangeMockEle, test_committed_end_lsn_after_r
PALF_LOG(INFO, "end test test_committed_end_lsn_after_removing_member", K(id)); PALF_LOG(INFO, "end test test_committed_end_lsn_after_removing_member", K(id));
} }
TEST_F(TestObSimpleLogClusterConfigChangeMockEle, test_remove_if_another_rebuild)
{
int ret = OB_SUCCESS;
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
const int64_t CONFIG_CHANGE_TIMEOUT = 4 * 1000 * 1000L; // 10s
SET_CASE_LOG_FILE(TEST_NAME, "test_remove_if_another_rebuild");
PALF_LOG(INFO, "begin test test_remove_if_another_rebuild", K(id));
{
int64_t leader_idx = 0;
PalfHandleImplGuard leader;
std::vector<PalfHandleImplGuard*> palf_list;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_mock_election(id, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 200, id));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
const int64_t b_idx = (leader_idx + 1) % 4;
const int64_t c_idx = (leader_idx + 2) % 4;
const int64_t d_idx = (leader_idx + 3) % 4;
const common::ObAddr b_addr = get_cluster()[b_idx]->get_addr();
const common::ObAddr c_addr = get_cluster()[c_idx]->get_addr();
const common::ObAddr d_addr = get_cluster()[d_idx]->get_addr();
PalfHandleImplGuard *a_handle = palf_list[leader_idx];
PalfHandleImplGuard *b_handle = palf_list[b_idx];
PalfHandleImplGuard *c_handle = palf_list[c_idx];
PalfHandleImplGuard *d_handle = palf_list[d_idx];
LogConfigVersion config_version;
// 1. disable vote
EXPECT_EQ(OB_SUCCESS, c_handle->palf_handle_impl_->disable_vote(false));
EXPECT_EQ(OB_TIMEOUT, leader.palf_handle_impl_->remove_member(common::ObMember(b_addr, 1), 2, CONFIG_CHANGE_TIMEOUT));
EXPECT_EQ(OB_SUCCESS, c_handle->palf_handle_impl_->enable_vote());
// 2. disable sync
EXPECT_EQ(OB_SUCCESS, c_handle->palf_handle_impl_->disable_sync());
EXPECT_EQ(OB_TIMEOUT, leader.palf_handle_impl_->remove_member(common::ObMember(b_addr, 1), 2, CONFIG_CHANGE_TIMEOUT));
EXPECT_EQ(OB_SUCCESS, c_handle->palf_handle_impl_->enable_sync());
// 3. need rebuild
c_handle->palf_handle_impl_->last_rebuild_lsn_ = LSN(leader.get_palf_handle_impl()->get_max_lsn().val_ + 100000);
EXPECT_EQ(OB_TIMEOUT, leader.palf_handle_impl_->remove_member(common::ObMember(b_addr, 1), 2, CONFIG_CHANGE_TIMEOUT));
leader.reset();
revert_cluster_palf_handle_guard(palf_list);
}
delete_paxos_group(id);
PALF_LOG(INFO, "end test test_committed_end_lsn_after_removing_member", K(id));
}
} // end unittest } // end unittest
} // end oceanbase } // end oceanbase

View File

@ -2352,12 +2352,12 @@ int LogConfigMgr::sync_get_committed_end_lsn_(const LogConfigChangeArgs &args,
last_slide_log_id))) { last_slide_log_id))) {
// PALF_LOG(WARN, "check_servers_lsn_and_version_ failed", K(ret), K(tmp_ret), K_(palf_id), K_(self), K(server), // PALF_LOG(WARN, "check_servers_lsn_and_version_ failed", K(ret), K(tmp_ret), K_(palf_id), K_(self), K(server),
// K(config_version), K(conn_timeout_us), K(force_remote_check), K(max_flushed_end_lsn), K(has_same_version)); // K(config_version), K(conn_timeout_us), K(force_remote_check), K(max_flushed_end_lsn), K(has_same_version));
} else if (false == is_arb_member) { } else if (false == is_arb_member && max_flushed_end_lsn.is_valid()) {
lsn_array[log_sync_resp_cnt++] = max_flushed_end_lsn; lsn_array[log_sync_resp_cnt++] = max_flushed_end_lsn;
paxos_resp_cnt++; paxos_resp_cnt++;
} else { } else if (true == is_arb_member) {
paxos_resp_cnt++; paxos_resp_cnt++;
} } else { }
added_member_has_new_version = (is_added_member)? has_same_version: added_member_has_new_version; added_member_has_new_version = (is_added_member)? has_same_version: added_member_has_new_version;
added_member_flushed_end_lsn = (is_added_member)? max_flushed_end_lsn: added_member_flushed_end_lsn; added_member_flushed_end_lsn = (is_added_member)? max_flushed_end_lsn: added_member_flushed_end_lsn;
added_member_last_slide_log_id = (is_added_member)? last_slide_log_id: added_member_last_slide_log_id; added_member_last_slide_log_id = (is_added_member)? last_slide_log_id: added_member_last_slide_log_id;

View File

@ -193,6 +193,13 @@ private:
common::ObMemberList lagged_list_; common::ObMemberList lagged_list_;
}; };
inline bool is_need_rebuild(const LSN &end_lsn, const LSN &last_rebuild_lsn)
{
return (end_lsn.is_valid() &&
last_rebuild_lsn.is_valid() &&
end_lsn < last_rebuild_lsn);
}
class LogSlidingWindow : public ISlidingCallBack class LogSlidingWindow : public ISlidingCallBack
{ {
public: public:

View File

@ -561,6 +561,12 @@ int PalfHandleImpl::handle_config_change_pre_check(const ObAddr &server,
int tmp_ret = common::OB_SUCCESS; int tmp_ret = common::OB_SUCCESS;
const bool is_vote_enabled = state_mgr_.is_allow_vote(); const bool is_vote_enabled = state_mgr_.is_allow_vote();
const bool is_sync_enabled = state_mgr_.is_sync_enabled(); const bool is_sync_enabled = state_mgr_.is_sync_enabled();
LSN last_rebuild_lsn;
do {
SpinLockGuard guard(last_rebuild_meta_info_lock_);
last_rebuild_lsn = last_rebuild_lsn_;
} while (0);
const bool need_rebuild = is_need_rebuild(get_end_lsn(), last_rebuild_lsn);
if (IS_NOT_INIT) { if (IS_NOT_INIT) {
ret = OB_NOT_INIT; ret = OB_NOT_INIT;
PALF_LOG(ERROR, "PalfHandleImpl has not inited", K(ret), K_(palf_id)); PALF_LOG(ERROR, "PalfHandleImpl has not inited", K(ret), K_(palf_id));
@ -569,10 +575,10 @@ int PalfHandleImpl::handle_config_change_pre_check(const ObAddr &server,
// to be added in the Paxos group. Check PalfHandleImpl only // to be added in the Paxos group. Check PalfHandleImpl only
resp.is_normal_replica_ = false; resp.is_normal_replica_ = false;
PALF_LOG(WARN, "get tenant data version failed", K(tmp_ret), K(req), K(resp)); PALF_LOG(WARN, "get tenant data version failed", K(tmp_ret), K(req), K(resp));
} else if (false == is_vote_enabled || false == is_sync_enabled) { } else if (false == is_vote_enabled || false == is_sync_enabled || true == need_rebuild) {
resp.is_normal_replica_ = false; resp.is_normal_replica_ = false;
PALF_LOG(WARN, "replica has been disabled vote/sync", K(ret), K(req), K(resp), PALF_LOG(WARN, "replica has been disabled vote/sync", K(ret), K(req), K(resp),
K(is_vote_enabled), K(is_sync_enabled)); K(is_vote_enabled), K(is_sync_enabled), K(need_rebuild));
} else { } else {
RLockGuard guard(lock_); RLockGuard guard(lock_);
if (req.need_purge_throttling_) { if (req.need_purge_throttling_) {
@ -582,19 +588,13 @@ int PalfHandleImpl::handle_config_change_pre_check(const ObAddr &server,
PALF_LOG_RET(WARN, tmp_ret, "failed to submit_purge_throttling_task with handle_config_change_pre_check", K_(palf_id)); PALF_LOG_RET(WARN, tmp_ret, "failed to submit_purge_throttling_task with handle_config_change_pre_check", K_(palf_id));
} }
} }
int64_t curr_proposal_id = state_mgr_.get_proposal_id(); resp.msg_proposal_id_ = state_mgr_.get_proposal_id();
resp.msg_proposal_id_ = curr_proposal_id;
LogConfigVersion curr_config_version; LogConfigVersion curr_config_version;
sw_.get_max_flushed_end_lsn(resp.max_flushed_end_lsn_);
resp.last_slide_log_id_ = sw_.get_last_slide_log_id();
if (OB_FAIL(config_mgr_.get_config_version(curr_config_version))) { if (OB_FAIL(config_mgr_.get_config_version(curr_config_version))) {
} else if (req.config_version_ != curr_config_version) {
resp.need_update_config_meta_ = true;
} else { } else {
LSN max_flushed_end_lsn; resp.need_update_config_meta_ = (req.config_version_ != curr_config_version);
sw_.get_max_flushed_end_lsn(max_flushed_end_lsn);
resp.max_flushed_end_lsn_ = max_flushed_end_lsn;
resp.need_update_config_meta_ = false;
resp.last_slide_log_id_ = sw_.get_last_slide_log_id();
} }
resp.is_normal_replica_ = true; resp.is_normal_replica_ = true;
@ -605,7 +605,7 @@ int PalfHandleImpl::handle_config_change_pre_check(const ObAddr &server,
int tmp_ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS;
if (OB_SUCCESS != (tmp_ret = sw_.try_fetch_log(FetchTriggerType::ADD_MEMBER_PRE_CHECK))) { if (OB_SUCCESS != (tmp_ret = sw_.try_fetch_log(FetchTriggerType::ADD_MEMBER_PRE_CHECK))) {
PALF_LOG(WARN, "try_fetch_log with ADD_MEMBER_PRE_CHECK failed", PALF_LOG(WARN, "try_fetch_log with ADD_MEMBER_PRE_CHECK failed",
KR(tmp_ret), KPC(this), K(server), K(curr_proposal_id)); KR(tmp_ret), KPC(this), K(server));
} else { } else {
PALF_LOG(INFO, "try_fetch_log with ADD_MEMBER_PRE_CHECK success", KR(tmp_ret), KPC(this)); PALF_LOG(INFO, "try_fetch_log with ADD_MEMBER_PRE_CHECK success", KR(tmp_ret), KPC(this));
} }
@ -4671,9 +4671,7 @@ int PalfHandleImpl::stat(PalfStat &palf_stat)
palf_stat.end_scn_ = get_end_scn(); palf_stat.end_scn_ = get_end_scn();
palf_stat.max_lsn_ = get_max_lsn(); palf_stat.max_lsn_ = get_max_lsn();
palf_stat.max_scn_ = get_max_scn(); palf_stat.max_scn_ = get_max_scn();
palf_stat.is_need_rebuild_ = (palf_stat.end_lsn_.is_valid() && palf_stat.is_need_rebuild_ = is_need_rebuild(palf_stat.end_lsn_, last_rebuild_lsn);
last_rebuild_lsn.is_valid() &&
palf_stat.end_lsn_ < last_rebuild_lsn);
palf_stat.is_in_sync_ = (LEADER == palf_stat.role_)? true: cached_is_in_sync_; palf_stat.is_in_sync_ = (LEADER == palf_stat.role_)? true: cached_is_in_sync_;
PALF_LOG(TRACE, "PalfHandleImpl stat", K(palf_stat)); PALF_LOG(TRACE, "PalfHandleImpl stat", K(palf_stat));
} }