Files
oceanbase/mittest/logservice/test_ob_simple_log_arb.cpp

784 lines
33 KiB
C++

// Copyright (c) 2021 OceanBase
// OceanBase is licensed under Mulan PubL v2.
// You can use this software according to the terms and conditions of the Mulan PubL v2.
// You may obtain a copy of Mulan PubL v2 at:
// http://license.coscl.org.cn/MulanPubL-2.0
// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
// See the Mulan PubL v2 for more details.
#include <cstdio>
#include <gtest/gtest.h>
#include <signal.h>
#define private public
#include "env/ob_simple_log_cluster_env.h"
#undef private
const std::string TEST_NAME = "arb_service";
using namespace oceanbase::common;
using namespace oceanbase;
namespace oceanbase
{
using namespace logservice;
int64_t ARB_TIMEOUT_ARG = 2 * 1000 * 1000L;
namespace logservice
{
void ObArbitrationService::update_arb_timeout_()
{
arb_timeout_us_ = ARB_TIMEOUT_ARG;
if (REACH_TIME_INTERVAL(2 * 1000 * 1000)) {
CLOG_LOG_RET(WARN, OB_ERR_UNEXPECTED, "update_arb_timeout_", K_(self), K_(arb_timeout_us));
}
}
}
namespace unittest
{
class TestObSimpleLogClusterArbService : public ObSimpleLogClusterTestEnv
{
public:
TestObSimpleLogClusterArbService() : ObSimpleLogClusterTestEnv()
{}
bool is_degraded(const PalfHandleImplGuard &leader,
const int64_t degraded_server_idx)
{
bool has_degraded = false;
while (!has_degraded) {
common::GlobalLearnerList degraded_learner_list;
leader.palf_handle_impl_->config_mgr_.get_degraded_learner_list(degraded_learner_list);
has_degraded = degraded_learner_list.contains(get_cluster()[degraded_server_idx]->get_addr());
sleep(1);
PALF_LOG(INFO, "wait degrade");
}
return has_degraded;
}
bool is_upgraded(PalfHandleImplGuard &leader, const int64_t palf_id)
{
bool has_upgraded = false;
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, palf_id));
while (!has_upgraded) {
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, palf_id));
common::GlobalLearnerList degraded_learner_list;
leader.palf_handle_impl_->config_mgr_.get_degraded_learner_list(degraded_learner_list);
has_upgraded = (0 == degraded_learner_list.get_member_number());
sleep(1);
PALF_LOG(INFO, "wait upgrade");
}
return has_upgraded;
}
};
int64_t ObSimpleLogClusterTestBase::member_cnt_ = 3;
int64_t ObSimpleLogClusterTestBase::node_cnt_ = 5;
bool ObSimpleLogClusterTestBase::need_add_arb_server_ = true;
std::string ObSimpleLogClusterTestBase::test_name_ = TEST_NAME;
TEST_F(TestObSimpleLogClusterArbService, test_2f1a_degrade_upgrade)
{
oceanbase::common::ObClusterVersion::get_instance().cluster_version_ = CLUSTER_VERSION_4_1_0_0;
SET_CASE_LOG_FILE(TEST_NAME, "arb_2f1a_degrade_upgrade");
OB_LOGGER.set_log_level("TRACE");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_2f1a_degrade_upgrade");
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
PalfHandleImplGuard leader;
std::vector<PalfHandleImplGuard*> palf_list;
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
const int64_t another_f_idx = (leader_idx+1)%3;
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
// 为备副本设置location cb,用于备副本找leader
palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
block_net(leader_idx, another_f_idx);
// do not check OB_SUCCESS, may return OB_NOT_MASTER during degrading member
submit_log(leader, 100, id);
EXPECT_TRUE(is_degraded(leader, another_f_idx));
loc_cb.leader_ = leader.palf_handle_impl_->self_;
unblock_net(leader_idx, another_f_idx);
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id));
EXPECT_TRUE(is_upgraded(leader, id));
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id));
// set clog disk error
ObTenantEnv::set_tenant(get_cluster()[leader_idx+1]->get_tenant_base());
logservice::coordinator::ObFailureDetector *detector = MTL(logservice::coordinator::ObFailureDetector *);
if (NULL != detector) {
detector->has_add_clog_full_event_ = true;
}
EXPECT_TRUE(is_degraded(leader, another_f_idx));
if (NULL != detector) {
detector->has_add_clog_full_event_ = false;
}
EXPECT_TRUE(is_upgraded(leader, id));
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id));
// test disable sync
palf_list[another_f_idx]->palf_handle_impl_->disable_sync();
EXPECT_TRUE(is_degraded(leader, another_f_idx));
palf_list[another_f_idx]->palf_handle_impl_->enable_sync();
EXPECT_TRUE(is_upgraded(leader, id));
// test disbale vote
palf_list[another_f_idx]->palf_handle_impl_->disable_vote(false/*no need check log missing*/);
EXPECT_TRUE(is_degraded(leader, another_f_idx));
palf_list[another_f_idx]->palf_handle_impl_->enable_vote();
EXPECT_TRUE(is_upgraded(leader, id));
// test revoking the leader when arb service is degrading
block_all_net(another_f_idx);
const common::ObAddr follower_addr = get_cluster()[another_f_idx]->get_addr();
LogConfigChangeArgs args(common::ObMember(follower_addr, 1), 0, DEGRADE_ACCEPTOR_TO_LEARNER);
int64_t ele_epoch;
common::ObRole ele_role;
int64_t proposal_id = leader.palf_handle_impl_->state_mgr_.get_proposal_id();
leader.palf_handle_impl_->election_.get_role(ele_role, ele_epoch);
LogConfigVersion config_version;
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config_(args, proposal_id, ele_epoch, config_version));
EXPECT_FALSE(leader.palf_handle_impl_->config_mgr_.alive_paxos_memberlist_.contains(follower_addr));
EXPECT_TRUE(leader.palf_handle_impl_->config_mgr_.applied_alive_paxos_memberlist_.contains(follower_addr));
EXPECT_EQ(leader.palf_handle_impl_->config_mgr_.state_, 1);
// reset status supposing the lease is expried
block_net(leader_idx, another_f_idx);
leader.palf_handle_impl_->config_mgr_.reset_status();
EXPECT_TRUE(is_degraded(leader, another_f_idx));
unblock_net(leader_idx, another_f_idx);
unblock_all_net(another_f_idx);
revert_cluster_palf_handle_guard(palf_list);
leader.reset();
delete_paxos_group(id);
PALF_LOG(INFO, "end test_2f1a_degrade_upgrade", K(id));
}
TEST_F(TestObSimpleLogClusterArbService, test_4f1a_degrade_upgrade)
{
SET_CASE_LOG_FILE(TEST_NAME, "arb_4f1a_degrade_upgrade");
OB_LOGGER.set_log_level("TRACE");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_4f1a_degrade_upgrade");
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
PalfHandleImplGuard leader;
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
std::vector<PalfHandleImplGuard*> palf_list;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_member(ObMember(get_cluster()[3]->get_addr(), 1), 3, CONFIG_CHANGE_TIMEOUT));
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_member(ObMember(get_cluster()[4]->get_addr(), 1), 4, CONFIG_CHANGE_TIMEOUT));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
const int64_t another_f1_idx = (leader_idx+3)%5;
const int64_t another_f2_idx = (leader_idx+4)%5;
palf_list[another_f1_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
palf_list[another_f2_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
sleep(2);
block_all_net(another_f1_idx);
block_all_net(another_f2_idx);
EXPECT_TRUE(is_degraded(leader, another_f1_idx));
EXPECT_TRUE(is_degraded(leader, another_f2_idx));
unblock_all_net(another_f1_idx);
unblock_all_net(another_f2_idx);
loc_cb.leader_ = leader.palf_handle_impl_->self_;
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id));
EXPECT_TRUE(is_upgraded(leader, id));
revert_cluster_palf_handle_guard(palf_list);
leader.reset();
delete_paxos_group(id);
PALF_LOG(INFO, "end test_4f1a_degrade_upgrade", K(id));
}
TEST_F(TestObSimpleLogClusterArbService, test_2f1a_reconfirm_degrade_upgrade)
{
SET_CASE_LOG_FILE(TEST_NAME, "arb_2f1a_reconfirm_test");
OB_LOGGER.set_log_level("TRACE");
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_2f1a_reconfirm_degrade_upgrade");
MockLocCB loc_cb;
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
PalfHandleImplGuard leader;
std::vector<PalfHandleImplGuard*> palf_list;
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
const int64_t another_f_idx = (leader_idx+1)%3;
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
sleep(2);
palf_list[leader_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
palf_list[another_f_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
// block net of old leader, new leader will be elected
// and degrade in RECONFIRM state
ARB_TIMEOUT_ARG = 15 * 1000 * 1000;
block_net(leader_idx, another_f_idx);
block_net(leader_idx, arb_replica_idx);
// block_net后会理解进行降级操作,导致旧主上有些单副本写成功的日志被committed
submit_log(leader, 20, id);
// submit some logs which will be truncated
EXPECT_TRUE(is_degraded(*palf_list[another_f_idx], leader_idx));
int64_t new_leader_idx = -1;
PalfHandleImplGuard new_leader;
EXPECT_EQ(OB_SUCCESS, get_leader(id, new_leader, new_leader_idx));
loc_cb.leader_ = new_leader.palf_handle_impl_->self_;
unblock_net(leader_idx, another_f_idx);
unblock_net(leader_idx, arb_replica_idx);
EXPECT_EQ(OB_SUCCESS, submit_log(new_leader, 100, id));
EXPECT_TRUE(is_upgraded(new_leader, id));
revert_cluster_palf_handle_guard(palf_list);
leader.reset();
new_leader.reset();
delete_paxos_group(id);
ARB_TIMEOUT_ARG = 2 * 1000 * 1000;
PALF_LOG(INFO, "end test_2f1a_reconfirm_degrade_upgrade", K(id));
}
TEST_F(TestObSimpleLogClusterArbService, test_4f1a_reconfirm_degrade_upgrade)
{
SET_CASE_LOG_FILE(TEST_NAME, "arb_4f1a_reconfirm_test");
OB_LOGGER.set_log_level("TRACE");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_4f1a_reconfirm_degrade_upgrade");
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
auto cluster = get_cluster();
PalfHandleImplGuard leader;
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
std::vector<PalfHandleImplGuard*> palf_list;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_member(ObMember(get_cluster()[3]->get_addr(), 1), 3, CONFIG_CHANGE_TIMEOUT));
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_member(ObMember(get_cluster()[4]->get_addr(), 1), 4, CONFIG_CHANGE_TIMEOUT));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
const int64_t another_f1_idx = 3;
const int64_t another_f2_idx = 4;
palf_list[leader_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
palf_list[another_f1_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
sleep(2);
// stop leader and a follower
block_all_net(leader_idx);
block_all_net(another_f1_idx);
//EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
// wait for new leader is elected
int64_t new_leader_idx = leader_idx;
PalfHandleImplGuard new_leader;
while (leader_idx == new_leader_idx) {
new_leader.reset();
EXPECT_EQ(OB_SUCCESS, get_leader(id, new_leader, new_leader_idx));
}
EXPECT_TRUE(is_degraded(new_leader, another_f1_idx));
EXPECT_TRUE(is_degraded(new_leader, leader_idx));
loc_cb.leader_ = new_leader.palf_handle_impl_->self_;
// restart two servers
unblock_all_net(leader_idx);
unblock_all_net(another_f1_idx);
EXPECT_EQ(OB_SUCCESS, submit_log(new_leader, 100, id));
EXPECT_TRUE(is_upgraded(new_leader, id));
leader.reset();
new_leader.reset();
revert_cluster_palf_handle_guard(palf_list);
delete_paxos_group(id);
PALF_LOG(INFO, "end test_4f1a_reconfirm_degrade_upgrade", K(id));
}
TEST_F(TestObSimpleLogClusterArbService, test_2f1a_config_change)
{
SET_CASE_LOG_FILE(TEST_NAME, "arb_2f1a_config_change");
OB_LOGGER.set_log_level("DEBUG");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin arb_2f1a_config_change");
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
PalfHandleImplGuard leader;
std::vector<PalfHandleImplGuard*> palf_list;
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
// 为备副本设置location cb,用于备副本找leader
const int64_t another_f_idx = (leader_idx+1)%3;
loc_cb.leader_ = leader.palf_handle_impl_->self_;
palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
palf_list[3]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
palf_list[4]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
sleep(2);
// replace member
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->replace_member(
ObMember(palf_list[3]->palf_handle_impl_->self_, 1),
ObMember(palf_list[another_f_idx]->palf_handle_impl_->self_, 1),
CONFIG_CHANGE_TIMEOUT));
// add learner
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(
ObMember(palf_list[4]->palf_handle_impl_->self_, 1),
CONFIG_CHANGE_TIMEOUT));
// switch learner
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_learner_to_acceptor(
ObMember(palf_list[4]->palf_handle_impl_->self_, 1),
CONFIG_CHANGE_TIMEOUT));
revert_cluster_palf_handle_guard(palf_list);
leader.reset();
delete_paxos_group(id);
PALF_LOG(INFO, "end arb_2f1a_config_change", K(id));
}
TEST_F(TestObSimpleLogClusterArbService, test_2f1a_arb_with_highest_version)
{
oceanbase::common::ObClusterVersion::get_instance().cluster_version_ = CLUSTER_VERSION_4_1_0_0;
SET_CASE_LOG_FILE(TEST_NAME, "test_2f1a_arb_with_highest_version");
OB_LOGGER.set_log_level("DEBUG");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_2f1a_arb_with_highest_version");
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
PalfHandleImplGuard leader;
std::vector<PalfHandleImplGuard*> palf_list;
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
// 为备副本设置location cb,用于备副本找leader
const int64_t another_f_idx = (leader_idx+1)%3;
loc_cb.leader_ = leader.palf_handle_impl_->self_;
palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 500, id));
sleep(2);
LogConfigChangeArgs args(ObMember(palf_list[3]->palf_handle_impl_->self_, 1), 0, ADD_LEARNER);
int64_t proposal_id = 0;
int64_t election_epoch = 0;
LogConfigVersion config_version;
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
// learner list and state_ has been changed
EXPECT_TRUE(config_version.is_valid());
EXPECT_EQ(1, leader.palf_handle_impl_->config_mgr_.state_);
// only send config log to arb member
ObMemberList member_list;
member_list.add_server(get_cluster()[2]->get_addr());
const int64_t prev_log_proposal_id = leader.palf_handle_impl_->config_mgr_.prev_log_proposal_id_;
const LSN prev_lsn = leader.palf_handle_impl_->config_mgr_.prev_lsn_;
const int64_t prev_mode_pid = leader.palf_handle_impl_->config_mgr_.prev_mode_pid_;
const LogConfigMeta config_meta = leader.palf_handle_impl_->config_mgr_.log_ms_meta_;
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->log_engine_.submit_change_config_meta_req( \
member_list, proposal_id, prev_log_proposal_id, prev_lsn, prev_mode_pid, config_meta));
sleep(1);
// check if arb member has received and persisted the config log
while (true) {
PalfHandleLiteGuard arb_member;
if (OB_FAIL(get_arb_member_guard(id, arb_member))) {
} else if (arb_member.palf_handle_lite_->config_mgr_.persistent_config_version_ == config_version) {
break;
} else {
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->log_engine_.submit_change_config_meta_req( \
member_list, proposal_id, prev_log_proposal_id, prev_lsn, prev_mode_pid, config_meta));
}
::ob_usleep(10 * 1000);
}
EXPECT_GT(config_version, leader.palf_handle_impl_->config_mgr_.persistent_config_version_);
EXPECT_GT(config_version, palf_list[1]->palf_handle_impl_->config_mgr_.persistent_config_version_);
// restart cluster, close a follower, restart leader
revert_cluster_palf_handle_guard(palf_list);
leader.reset();
// block_net, so two F cann't reach majority
block_net(another_f_idx, leader_idx);
EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
const int64_t restart_finish_time_us_ = common::ObTimeUtility::current_time();
PalfHandleImplGuard new_leader;
int64_t new_leader_idx;
get_leader(id, new_leader, new_leader_idx);
EXPECT_EQ(OB_SUCCESS, submit_log(new_leader, 500, id));
PALF_LOG(ERROR, "RTO", "RTO", common::ObTimeUtility::current_time() - restart_finish_time_us_);
new_leader.reset();
// must delete paxos group in here, otherwise memory of
// MockLocCB will be relcaimed and core dump will occur
// blacklist will not be deleted after reboot, clean it manually
unblock_net(another_f_idx, leader_idx);
delete_paxos_group(id);
PALF_LOG(INFO, "end test_2f1a_arb_with_highest_version", K(id));
}
TEST_F(TestObSimpleLogClusterArbService, test_2f1a_defensive)
{
SET_CASE_LOG_FILE(TEST_NAME, "test_2f1a_defensive");
OB_LOGGER.set_log_level("DEBUG");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_2f1a_defensive");
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
PalfHandleImplGuard leader;
std::vector<PalfHandleImplGuard*> palf_list;
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
// 为备副本设置location cb,用于备副本找leader
const int64_t another_f_idx = (leader_idx+1)%3;
loc_cb.leader_ = leader.palf_handle_impl_->self_;
palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
sleep(2);
const int64_t added_member_idx = 3;
const common::ObMember added_member = ObMember(palf_list[added_member_idx]->palf_handle_impl_->self_, 1);
// add a member, do not allow to append logs until config log reaches majority
LogConfigChangeArgs args(added_member, 3, ADD_MEMBER);
int64_t proposal_id = 0;
int64_t election_epoch = 0;
LogConfigVersion config_version;
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
// do not allow to append log when changing config with arb
EXPECT_TRUE(leader.palf_handle_impl_->state_mgr_.is_changing_config_with_arb());
while (true) {
if (OB_SUCC(leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version))) {
break;
} else {
(void) leader.palf_handle_impl_->config_mgr_.pre_sync_config_log_and_mode_meta(args.server_, proposal_id);
::ob_usleep(10 * 1000);
}
}
// flashback one follower
LogEntryHeader header_origin;
SCN base_scn;
base_scn.set_base();
SCN flashback_scn;
palf::AccessMode unused_access_mode;
int64_t mode_version;
EXPECT_EQ(OB_SUCCESS, get_middle_scn(50, leader, flashback_scn, header_origin));
switch_append_to_flashback(leader, mode_version);
sleep(1);
EXPECT_EQ(OB_SUCCESS, palf_list[another_f_idx]->palf_handle_impl_->flashback(mode_version, flashback_scn, CONFIG_CHANGE_TIMEOUT));
// remove another follower
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->remove_member(added_member, 2, CONFIG_CHANGE_TIMEOUT));
revert_cluster_palf_handle_guard(palf_list);
leader.reset();
delete_paxos_group(id);
PALF_LOG(INFO, "end test_2f1a_defensive", K(id));
}
int get_palf_handle_lite(const int64_t tenant_id,
const int64_t palf_id,
ObSimpleArbServer *server,
IPalfHandleImplGuard &handle_guard)
{
int ret = OB_SUCCESS;
PalfEnvLiteGuard env_guard;
if (NULL == server) {
ret = OB_INVALID_ARGUMENT;
} else if (OB_FAIL(server->get_palf_env_lite(tenant_id, env_guard))) {
PALF_LOG(ERROR, "get_palf_env_lite failed", K(tenant_id), K(palf_id));
} else if (OB_FAIL(env_guard.palf_env_lite_->get_palf_handle_impl(palf_id, handle_guard))) {
PALF_LOG(ERROR, "get_palf_handle_impl failed", K(tenant_id), K(palf_id));
} else {
}
return ret;
}
using namespace palflite;
TEST_F(TestObSimpleLogClusterArbService, test_multi_meta_block)
{
SET_CASE_LOG_FILE(TEST_NAME, "test_mutli_meta_block");
OB_LOGGER.set_log_level("INFO");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_multi_meta_block");
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
PalfHandleImplGuard leader;
std::vector<PalfHandleImplGuard*> palf_list;
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
// 为备副本设置location cb,用于备副本找leader
const int64_t another_f_idx = (leader_idx+1)%3;
loc_cb.leader_ = leader.palf_handle_impl_->self_;
palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
sleep(2);
ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
IPalfHandleImplGuard arb_guard;
ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
LogEngine *log_engine = &arb_palf->log_engine_;
LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
LogStorage *meta_storage = &log_engine->log_meta_storage_;
{
while (1) {
if (meta_storage->log_tail_ < LSN(meta_storage->logical_block_size_)) {
EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
} else {
break;
}
}
}
meta_tail = log_engine->log_meta_storage_.log_tail_;
ASSERT_EQ(meta_tail, LSN(log_engine->log_meta_storage_.logical_block_size_));
revert_cluster_palf_handle_guard(palf_list);
arb_guard.reset();
leader.reset();
EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
{
ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
PalfHandleImplGuard leader;
EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx));
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
IPalfHandleImplGuard arb_guard;
ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
LogEngine *log_engine = &arb_palf->log_engine_;
LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
LogStorage *meta_storage = &log_engine->log_meta_storage_;
EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
ASSERT_NE(meta_tail, LSN(log_engine->log_meta_storage_.logical_block_size_));
}
EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
{
ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
PalfHandleImplGuard leader;
EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx));
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
IPalfHandleImplGuard arb_guard;
ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
LogEngine *log_engine = &arb_palf->log_engine_;
LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
LogStorage *meta_storage = &log_engine->log_meta_storage_;
while (1) {
if (meta_storage->log_tail_ < LSN(32 * meta_storage->logical_block_size_)) {
EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
} else {
break;
}
}
}
EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
{
ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
PalfHandleImplGuard leader;
EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx));
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 4000, id));
IPalfHandleImplGuard arb_guard;
ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
LogEngine *log_engine = &arb_palf->log_engine_;
LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
LogStorage *meta_storage = &log_engine->log_meta_storage_;
while (1) {
if (meta_storage->log_tail_ < LSN(34 * meta_storage->logical_block_size_ + 4*4*1024)) {
EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
} else {
break;
}
}
}
delete_paxos_group(id);
PALF_LOG(INFO, "end test_mutli_meta_block", K(id));
}
// 1. 2F1A, the leader starts to degrade another F
// 2. after the config log has been accepted by another F, the leader revoked
// 3. the previous leader has been elected as the new leader
// 4. reconfirm may fail because leader's config_version is not same to that of the follower
TEST_F(TestObSimpleLogClusterArbService, test_2f1a_degrade_when_no_leader)
{
SET_CASE_LOG_FILE(TEST_NAME, "test_2f1a_degrade_when_no_leader");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_2f1a_degrade_when_no_leader");
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
PalfHandleImplGuard leader;
std::vector<PalfHandleImplGuard*> palf_list;
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
// 为备副本设置location cb,用于备副本找leader
const int64_t another_f_idx = (leader_idx+1)%3;
loc_cb.leader_ = leader.palf_handle_impl_->self_;
palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
sleep(2);
LogConfigChangeArgs args(ObMember(palf_list[another_f_idx]->palf_handle_impl_->self_, 1), 0, DEGRADE_ACCEPTOR_TO_LEARNER);
int64_t proposal_id = 0;
int64_t election_epoch = 0;
LogConfigVersion config_version;
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
// leader appended config meta, but did not apply config meta
EXPECT_NE(palf_list[leader_idx]->get_palf_handle_impl()->config_mgr_.log_ms_meta_.curr_.config_version_,
palf_list[leader_idx]->get_palf_handle_impl()->config_mgr_.config_meta_.curr_.config_version_);
EXPECT_EQ(palf_list[another_f_idx]->get_palf_handle_impl()->config_mgr_.log_ms_meta_.curr_.config_version_,
palf_list[another_f_idx]->get_palf_handle_impl()->config_mgr_.config_meta_.curr_.config_version_);
// block all networks of arb member, and the network from the follower to the leader
block_net(arb_replica_idx, another_f_idx, true);
block_net(arb_replica_idx, leader_idx, true);
block_net(another_f_idx, leader_idx, true);
// waiting for leader revoke
while (leader.palf_handle_impl_->state_mgr_.role_ == common::ObRole::LEADER) {
sleep(1);
}
// unblock_net
unblock_net(another_f_idx, leader_idx);
unblock_net(arb_replica_idx, leader_idx);
common::ObMemberList leader_member_list;
int64_t leader_replica_num = 0;
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.get_log_sync_member_list( \
leader_member_list, leader_replica_num));
EXPECT_EQ(2, leader_member_list.get_member_number());
EXPECT_EQ(2, leader_replica_num);
int64_t new_leader_idx = 0;
PalfHandleImplGuard new_leader;
EXPECT_EQ(OB_SUCCESS, get_leader(id, new_leader, new_leader_idx));
EXPECT_EQ(leader.palf_handle_impl_->self_, new_leader.palf_handle_impl_->self_);
EXPECT_EQ(OB_SUCCESS, new_leader.palf_handle_impl_->config_mgr_.get_log_sync_member_list( \
leader_member_list, leader_replica_num));
EXPECT_EQ(2, leader_member_list.get_member_number());
EXPECT_EQ(2, leader_replica_num);
revert_cluster_palf_handle_guard(palf_list);
leader.reset();
new_leader.reset();
delete_paxos_group(id);
PALF_LOG(INFO, "end test_2f1a_degrade_when_no_leader", K(id));
}
TEST_F(TestObSimpleLogClusterArbService, test_2f1a_upgrade_when_no_leader)
{
SET_CASE_LOG_FILE(TEST_NAME, "test_2f1a_upgrade_when_no_leader");
// OB_LOGGER.set_log_level("TRACE");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_2f1a_upgrade_when_no_leader");
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
PalfHandleImplGuard leader;
std::vector<PalfHandleImplGuard*> palf_list;
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
// 为备副本设置location cb,用于备副本找leader
const int64_t another_f_idx = (leader_idx+1)%3;
loc_cb.leader_ = leader.palf_handle_impl_->self_;
palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
sleep(2);
// block the network from the follower to the leader
block_net(another_f_idx, leader_idx, true);
is_degraded(leader, another_f_idx);
// upgrade follower manually
int64_t proposal_id;
int64_t election_epoch;
LogConfigVersion config_version;
LogConfigChangeArgs args(common::ObMember(get_cluster()[another_f_idx]->get_addr(), 1), 0, UPGRADE_LEARNER_TO_ACCEPTOR);
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
block_net(arb_replica_idx, leader_idx, true);
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
EXPECT_EQ(1, leader.palf_handle_impl_->config_mgr_.state_);
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
EXPECT_EQ(1, leader.palf_handle_impl_->config_mgr_.state_);
EXPECT_NE(palf_list[leader_idx]->get_palf_handle_impl()->config_mgr_.log_ms_meta_.curr_.config_version_,
palf_list[leader_idx]->get_palf_handle_impl()->config_mgr_.config_meta_.curr_.config_version_);
// waiting for leader revoke
while (leader.palf_handle_impl_->state_mgr_.role_ == LEADER) {
sleep(1);
}
// avoid the follower is elected to be leader
block_net(arb_replica_idx, another_f_idx, true);
unblock_all_net(leader_idx);
// waiting for leader takeover
while (!leader.palf_handle_impl_->state_mgr_.is_leader_active()) {
sleep(1);
}
// waiting for upgrading
is_upgraded(leader, id);
revert_cluster_palf_handle_guard(palf_list);
leader.reset();
delete_paxos_group(id);
PALF_LOG(INFO, "end test_2f1a_upgrade_when_no_leader", K(id));
}
} // end unittest
} // end oceanbase
int main(int argc, char **argv)
{
RUN_SIMPLE_LOG_CLUSTER_TEST(TEST_NAME);
}