[PALF] fix is_changing_config_with_arb_ is true after a reconfiguration failed
This commit is contained in:
@ -274,7 +274,7 @@ TEST_F(TestObSimpleLogClusterAccessMode, prev_log_slide)
|
||||
LogConfigVersion config_version;
|
||||
const int64_t proposal_id = leader.palf_handle_impl_->state_mgr_.get_proposal_id();
|
||||
const int64_t leader_epoch = leader.palf_handle_impl_->state_mgr_.get_leader_epoch();
|
||||
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, leader_epoch, config_version));
|
||||
EXPECT_EQ(OB_ERR_UNEXPECTED, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, leader_epoch, config_version));
|
||||
const LogConfigMeta new_config_meta = leader.palf_handle_impl_->config_mgr_.log_ms_meta_;
|
||||
EXPECT_EQ(config_meta.curr_.config_version_, new_config_meta.curr_.config_version_);
|
||||
// wait prepare req reaches majority
|
||||
|
||||
@ -398,9 +398,10 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_arb_with_highest_version)
|
||||
sleep(2);
|
||||
|
||||
LogConfigChangeArgs args(ObMember(palf_list[3]->palf_handle_impl_->self_, 1), 0, ADD_LEARNER);
|
||||
const int64_t proposal_id = leader.palf_handle_impl_->state_mgr_.get_proposal_id();
|
||||
const int64_t election_epoch = leader.palf_handle_impl_->state_mgr_.get_leader_epoch();
|
||||
int64_t proposal_id = 0;
|
||||
int64_t election_epoch = 0;
|
||||
LogConfigVersion config_version;
|
||||
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
|
||||
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
|
||||
// learner list and state_ has been changed
|
||||
EXPECT_TRUE(config_version.is_valid());
|
||||
@ -480,9 +481,10 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_defensive)
|
||||
|
||||
// add a member, do not allow to append logs until config log reaches majority
|
||||
LogConfigChangeArgs args(added_member, 3, ADD_MEMBER);
|
||||
const int64_t proposal_id = leader.palf_handle_impl_->state_mgr_.get_proposal_id();
|
||||
const int64_t election_epoch = leader.palf_handle_impl_->state_mgr_.get_leader_epoch();
|
||||
int64_t proposal_id = 0;
|
||||
int64_t election_epoch = 0;
|
||||
LogConfigVersion config_version;
|
||||
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
|
||||
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
|
||||
// do not allow to append log when changing config with arb
|
||||
EXPECT_TRUE(leader.palf_handle_impl_->state_mgr_.is_changing_config_with_arb());
|
||||
@ -664,9 +666,10 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_degrade_when_no_leader)
|
||||
sleep(2);
|
||||
|
||||
LogConfigChangeArgs args(ObMember(palf_list[another_f_idx]->palf_handle_impl_->self_, 1), 0, DEGRADE_ACCEPTOR_TO_LEARNER);
|
||||
const int64_t proposal_id = leader.palf_handle_impl_->state_mgr_.get_proposal_id();
|
||||
const int64_t election_epoch = leader.palf_handle_impl_->state_mgr_.get_leader_epoch();
|
||||
int64_t proposal_id = 0;
|
||||
int64_t election_epoch = 0;
|
||||
LogConfigVersion config_version;
|
||||
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
|
||||
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
|
||||
|
||||
// leader appended config meta, but did not apply config meta
|
||||
|
||||
@ -384,6 +384,54 @@ TEST_F(TestObSimpleLogClusterArbMockEleService, test_2f1a_degrade_when_no_leader
|
||||
PALF_LOG(INFO, "end test test_2f1a_degrade_when_no_leader2", K(id));
|
||||
}
|
||||
|
||||
TEST_F(TestObSimpleLogClusterArbMockEleService, test_2f1a_change_config_fail)
|
||||
{
|
||||
OB_LOGGER.set_log_level("INFO");
|
||||
int ret = OB_SUCCESS;
|
||||
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
|
||||
const int64_t TIMEOUT_US = 10 * 1000 * 1000L;
|
||||
SET_CASE_LOG_FILE(TEST_NAME, "test_2f1a_change_config_fail");
|
||||
PALF_LOG(INFO, "begin test test_2f1a_change_config_fail", K(id));
|
||||
{
|
||||
int64_t leader_idx = 0;
|
||||
int64_t arb_replica_idx = 0;
|
||||
PalfHandleImplGuard leader;
|
||||
std::vector<PalfHandleImplGuard*> palf_list;
|
||||
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb_mock_election(id, arb_replica_idx, leader_idx, leader));
|
||||
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 200, id));
|
||||
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
|
||||
dynamic_cast<ObSimpleLogServer*>(get_cluster()[leader_idx])->log_service_.get_arbitration_service()->stop();
|
||||
|
||||
const int64_t b_idx = (leader_idx + 1) % 4;
|
||||
const int64_t c_idx = (leader_idx + 2) % 4;
|
||||
const int64_t d_idx = (leader_idx + 3) % 4;
|
||||
const common::ObAddr a_addr = get_cluster()[leader_idx]->get_addr();
|
||||
const common::ObAddr b_addr = get_cluster()[b_idx]->get_addr();
|
||||
const common::ObAddr c_addr = get_cluster()[c_idx]->get_addr();
|
||||
const common::ObAddr d_addr = get_cluster()[d_idx]->get_addr();
|
||||
PalfHandleImplGuard *a_handle = palf_list[leader_idx];
|
||||
PalfHandleImplGuard *b_handle = palf_list[b_idx];
|
||||
PalfHandleImplGuard *d_handle = palf_list[d_idx];
|
||||
|
||||
LogConfigChangeArgs add_d_arg(common::ObMember(d_addr, 1), 4, ADD_MEMBER);
|
||||
int64_t add_d_pid = 0;
|
||||
int64_t add_d_epoch = 0;
|
||||
LogConfigVersion add_d_version;
|
||||
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(add_d_pid, add_d_epoch, add_d_arg.type_));
|
||||
|
||||
// block the network from the leader to the follower
|
||||
block_net(leader_idx, d_idx);
|
||||
EXPECT_UNTIL_EQ(OB_LOG_NOT_SYNC, leader.palf_handle_impl_->config_mgr_.change_config_(add_d_arg, add_d_pid, add_d_epoch, add_d_version));
|
||||
EXPECT_FALSE(add_d_version.is_valid());
|
||||
|
||||
unblock_net(leader_idx, d_idx);
|
||||
dynamic_cast<ObSimpleLogServer*>(get_cluster()[leader_idx])->log_service_.get_arbitration_service()->start();
|
||||
revert_cluster_palf_handle_guard(palf_list);
|
||||
}
|
||||
delete_paxos_group(id);
|
||||
PALF_LOG(INFO, "end test test_2f1a_change_config_fail", K(id));
|
||||
}
|
||||
|
||||
TEST_F(TestObSimpleLogClusterArbMockEleService, test_2f1a_degrade_when_arb_crash)
|
||||
{
|
||||
OB_LOGGER.set_log_level("INFO");
|
||||
|
||||
@ -774,8 +774,8 @@ int LogConfigMgr::change_config_(const LogConfigChangeArgs &args,
|
||||
"role", state_mgr_->get_role(), "state", state_mgr_->get_state());
|
||||
} else if (false == mode_mgr_->can_do_paxos_accept()) {
|
||||
// do not allow chagne_config when ModeMgr is in prepare state
|
||||
ret = OB_EAGAIN;
|
||||
PALF_LOG(WARN, "is changing access_mode, try again", KR(ret), K_(palf_id), K_(self),
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
PALF_LOG(ERROR, "is changing access_mode, try again", KR(ret), K_(palf_id), K_(self),
|
||||
"role", state_mgr_->get_role(), "state", state_mgr_->get_state());
|
||||
} else if (OB_FAIL(check_config_version_matches_state_(args.type_, config_version))) {
|
||||
PALF_LOG(WARN, "config_version does not match with state, try again", KR(ret), K_(palf_id), K_(self),
|
||||
@ -1949,13 +1949,22 @@ int LogConfigMgr::wait_log_barrier_(const LogConfigChangeArgs &args,
|
||||
int64_t unused_id = INT64_MAX;
|
||||
bool unused_bool = false;
|
||||
|
||||
int64_t curr_ts_us = common::ObTimeUtility::current_time();
|
||||
constexpr int64_t conn_timeout_us = 3 * 1000 * 1000L; // 3s
|
||||
constexpr bool need_purge_throttling = true;
|
||||
constexpr bool need_remote_check = false;
|
||||
const bool need_skip_log_barrier = mode_mgr_->need_skip_log_barrier();
|
||||
LSN prev_log_end_lsn;
|
||||
start_wait_barrier_time_us_ = (OB_INVALID_TIMESTAMP == start_wait_barrier_time_us_)? \
|
||||
curr_ts_us: start_wait_barrier_time_us_;
|
||||
if (new_config_info.log_sync_memberlist_.get_member_number() == 0) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
} else if (curr_ts_us - start_wait_barrier_time_us_ > MAX_WAIT_BARRIER_TIME_US_FOR_RECONFIGURATION &&
|
||||
args.type_ != LogConfigChangeType::STARTWORKING) {
|
||||
ret = OB_LOG_NOT_SYNC;
|
||||
PALF_LOG(WARN, "waiting for log barrier timeout, skip", KR(ret), K_(palf_id), K_(self),
|
||||
K_(start_wait_barrier_time_us), K(first_committed_end_lsn), K(prev_log_end_lsn));
|
||||
start_wait_barrier_time_us_ = curr_ts_us;
|
||||
} else if (OB_FAIL(sync_get_committed_end_lsn_(args, new_config_info, need_purge_throttling,
|
||||
need_remote_check, conn_timeout_us, first_committed_end_lsn, unused_bool, unused_lsn, unused_id))) {
|
||||
PALF_LOG(WARN, "sync_get_committed_end_lsn failed", K(ret), K_(palf_id), K_(self), K(new_config_info));
|
||||
@ -1968,7 +1977,7 @@ int LogConfigMgr::wait_log_barrier_(const LogConfigChangeArgs &args,
|
||||
} else if (FALSE_IT(ret = (first_committed_end_lsn >= prev_log_end_lsn)? OB_SUCCESS: OB_EAGAIN)) {
|
||||
} else if (OB_EAGAIN == ret) {
|
||||
// committed_end_lsn do not change during 2s, skip the reconfiguration
|
||||
const int64_t curr_ts_us = common::ObTimeUtility::current_time();
|
||||
curr_ts_us = common::ObTimeUtility::current_time();
|
||||
if (OB_INVALID_TIMESTAMP == last_wait_barrier_time_us_) {
|
||||
last_wait_committed_end_lsn_ = first_committed_end_lsn;
|
||||
last_wait_barrier_time_us_ = curr_ts_us;
|
||||
@ -1984,15 +1993,6 @@ int LogConfigMgr::wait_log_barrier_(const LogConfigChangeArgs &args,
|
||||
last_wait_barrier_time_us_ = curr_ts_us;
|
||||
}
|
||||
}
|
||||
if (OB_INVALID_TIMESTAMP == start_wait_barrier_time_us_) {
|
||||
start_wait_barrier_time_us_ = curr_ts_us;
|
||||
} else if (curr_ts_us - start_wait_barrier_time_us_ > MAX_WAIT_BARRIER_TIME_US_FOR_RECONFIGURATION &&
|
||||
args.type_ != LogConfigChangeType::STARTWORKING) {
|
||||
ret = OB_LOG_NOT_SYNC;
|
||||
PALF_LOG(WARN, "waiting for log barrier timeout, skip", KR(ret), K_(palf_id), K_(self),
|
||||
K_(start_wait_barrier_time_us), K(first_committed_end_lsn), K(prev_log_end_lsn));
|
||||
start_wait_barrier_time_us_ = curr_ts_us;
|
||||
}
|
||||
}
|
||||
PALF_LOG(INFO, "waiting for log barrier", K(ret), K_(palf_id), K_(self),
|
||||
K(first_committed_end_lsn), K(prev_log_end_lsn), K(new_config_info));
|
||||
|
||||
Reference in New Issue
Block a user