365 lines
18 KiB
C++
365 lines
18 KiB
C++
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
|
|
#include "share/ob_occam_time_guard.h"
|
|
#include "election_acceptor.h"
|
|
#include "common/ob_clock_generator.h"
|
|
#include "election_impl.h"
|
|
#include "lib/net/ob_addr.h"
|
|
#include "logservice/palf/election/interface/election_priority.h"
|
|
#include "logservice/palf/election/utils/election_common_define.h"
|
|
#include "logservice/palf/election/utils/election_event_recorder.h"
|
|
|
|
namespace oceanbase
|
|
{
|
|
namespace palf
|
|
{
|
|
namespace election
|
|
{
|
|
|
|
#define CHECK_SILENCE()\
|
|
do {\
|
|
if (ATOMIC_LOAD(&INIT_TS) < 0) {\
|
|
ELECT_LOG_RET(ERROR, common::OB_ERROR, "INIT_TS is less than 0, may not call GLOBAL_INIT_ELECTION_MODULE yet!", K(*this));\
|
|
return;\
|
|
} else if (OB_UNLIKELY(get_monotonic_ts() < ATOMIC_LOAD(&INIT_TS) + MAX_LEASE_TIME)) {\
|
|
ELECT_LOG(INFO, "keep silence for safty, won't send response", K(*this));\
|
|
return;\
|
|
}\
|
|
} while(0)
|
|
|
|
template <typename Type>
|
|
struct ResponseType {};
|
|
|
|
template <>
|
|
struct ResponseType<ElectionPrepareRequestMsg> { using type = ElectionPrepareResponseMsg; };
|
|
|
|
template <>
|
|
struct ResponseType<ElectionAcceptRequestMsg> { using type = ElectionAcceptResponseMsg; };
|
|
|
|
class RequestChecker
|
|
{
|
|
private:
|
|
public:
|
|
template <typename RequestMsg, typename Acceptor>
|
|
static bool check_ballot_valid(const RequestMsg &msg, Acceptor *p_acceptor, const LogPhase phase)
|
|
{
|
|
ELECT_TIME_GUARD(500_ms);
|
|
#define PRINT_WRAPPER K(msg), K(*p_acceptor)
|
|
bool ret = false;
|
|
if (OB_UNLIKELY(msg.get_ballot_number() < p_acceptor->ballot_number_)) {
|
|
using T = typename ResponseType<RequestMsg>::type;
|
|
T reject_msg = create_reject_message_(p_acceptor->p_election_->get_self_addr(),
|
|
p_acceptor->p_election_->inner_priority_seed_,
|
|
p_acceptor->p_election_->get_membership_version_(),
|
|
p_acceptor->p_election_->get_ls_biggest_min_cluster_version_ever_seen_(),
|
|
msg);
|
|
reject_msg.set_rejected(p_acceptor->ballot_number_);
|
|
p_acceptor->p_election_->send_(reject_msg);
|
|
LOG_PHASE(WARN, phase, "receive old ballot request, refused");
|
|
} else {
|
|
ret = true;
|
|
}
|
|
return ret;
|
|
#undef PRINT_WRAPPER
|
|
}
|
|
private:
|
|
static ElectionPrepareResponseMsg create_reject_message_(const common::ObAddr &addr,
|
|
const uint64_t inner_priority_seed,
|
|
const LogConfigVersion &membership_version,
|
|
const LsBiggestMinClusterVersionEverSeen &version,
|
|
const ElectionPrepareRequestMsg &msg)
|
|
{
|
|
UNUSED(inner_priority_seed),
|
|
UNUSED(membership_version);
|
|
return ElectionPrepareResponseMsg(addr, version, msg);
|
|
}
|
|
static ElectionAcceptResponseMsg create_reject_message_(const common::ObAddr &addr,
|
|
const uint64_t inner_priority_seed,
|
|
const LogConfigVersion &membership_version,
|
|
const LsBiggestMinClusterVersionEverSeen &version,
|
|
const ElectionAcceptRequestMsg &msg)
|
|
{
|
|
return ElectionAcceptResponseMsg(addr, inner_priority_seed, membership_version, version, msg);
|
|
}
|
|
};
|
|
|
|
ElectionAcceptor::ElectionAcceptor(ElectionImpl *p_election) :
|
|
ballot_number_(INVALID_VALUE),
|
|
ballot_of_time_window_(INVALID_VALUE),
|
|
is_time_window_opened_(false),
|
|
p_election_(p_election),
|
|
last_time_window_open_ts_(INVALID_VALUE),
|
|
last_dump_acceptor_info_ts_(INVALID_VALUE) {}
|
|
|
|
void ElectionAcceptor::advance_ballot_number_and_reset_related_states_(const int64_t new_ballot_number,
|
|
const LogPhase phase)
|
|
{
|
|
ELECT_TIME_GUARD(500_ms);
|
|
#define PRINT_WRAPPER K(new_ballot_number), K(*this)
|
|
if (new_ballot_number > ballot_number_) {
|
|
ballot_number_ = new_ballot_number;
|
|
ballot_of_time_window_ = ballot_number_;
|
|
reset_time_window_states_(phase);
|
|
} else {
|
|
LOG_PHASE_RET(ERROR, OB_INVALID_ARGUMENT, phase, "invalid argument");
|
|
}
|
|
#undef PRINT_WRAPPER
|
|
}
|
|
|
|
int ElectionAcceptor::start()
|
|
{
|
|
ObAddr last_record_lease_owner;
|
|
bool last_record_lease_valid_state = false;
|
|
int ret = OB_SUCCESS;
|
|
return p_election_->timer_->schedule_task_repeat(time_window_task_handle_,
|
|
250_ms,
|
|
[this,
|
|
last_record_lease_owner,
|
|
last_record_lease_valid_state]() mutable {
|
|
ELECT_TIME_GUARD(500_ms);
|
|
#define PRINT_WRAPPER KR(ret), K(*this)
|
|
int ret = OB_SUCCESS;
|
|
|
|
LockGuard lock_guard(p_election_->lock_);
|
|
// 周期性打印选举的状态
|
|
if (ObClockGenerator::getClock() > last_dump_acceptor_info_ts_ + 3_s) {
|
|
last_dump_acceptor_info_ts_ = ObClockGenerator::getClock();
|
|
ELECT_LOG(INFO, "dump acceptor info", K(*this));
|
|
}
|
|
// 当acceptor的Lease有效状态发生变化时需要打印日志以及汇报事件
|
|
bool lease_valid_state = !lease_.is_expired();
|
|
if (last_record_lease_valid_state != lease_valid_state) {// 当记录的lease有效状态与当前Lease的有效状态不符时
|
|
if (lease_valid_state) {// Lease从无效变有效,意味着该acceptor【可能】见证了一个新的Leader诞生
|
|
LOG_ELECT_LEADER(INFO, "witness new leader");
|
|
} else {// Lease从有效变无效,意味着本副本可能与Leader失去了网络连接
|
|
LOG_RENEW_LEASE(WARN, "lease expired");
|
|
p_election_->event_recorder_.report_acceptor_lease_expired_event(lease_);
|
|
lease_.reset();
|
|
}
|
|
}
|
|
// 当acceptor发现Lease的owner发生变化的时候需要打印日志以及汇报事件
|
|
if (last_record_lease_owner != lease_.get_owner()) {
|
|
if (last_record_lease_owner.is_valid() && lease_.get_owner().is_valid()) {
|
|
LOG_CHANGE_LEADER(INFO, "lease owner changed");
|
|
p_election_->event_recorder_.report_acceptor_witness_change_leader_event(last_record_lease_owner, lease_.get_owner());
|
|
}
|
|
last_record_lease_owner = lease_.get_owner();
|
|
}
|
|
if (is_time_window_opened_) {
|
|
ElectionPrepareResponseMsg prepare_res_accept(p_election_->get_self_addr(),
|
|
p_election_->get_ls_biggest_min_cluster_version_ever_seen_(),
|
|
highest_priority_prepare_req_);
|
|
bool can_vote = false;
|
|
if (last_record_lease_valid_state && !lease_valid_state) {// 这个定时任务可能是被延迟致lease到期时触发的,为了在lease到期的第一时间投票
|
|
can_vote = true;
|
|
LOG_ELECT_LEADER(INFO, "vote when lease expired");
|
|
} else if (ObClockGenerator::getClock() - last_time_window_open_ts_ >= CALCULATE_TIME_WINDOW_SPAN_TS()) {
|
|
can_vote = true;
|
|
} else {
|
|
LOG_ELECT_LEADER(INFO, "can't vote now", K(last_record_lease_valid_state),
|
|
K(lease_valid_state), K(CALCULATE_TIME_WINDOW_SPAN_TS()),
|
|
KTIME_(last_time_window_open_ts));
|
|
}
|
|
if (can_vote) {
|
|
// 1. 定时任务触发时要检查是否重新开启过窗口,避免提前关闭新的窗口,同时检查是否有这一轮记录的prepare请求
|
|
if (ballot_of_time_window_ == highest_priority_prepare_req_.get_ballot_number() &&
|
|
ballot_of_time_window_ > ballot_number_) {
|
|
// 1.1 要投票了,可以推大ballot number了
|
|
ballot_number_ = ballot_of_time_window_;
|
|
// 1.2 若lease过期需要重置lease,因为proposer没办法判断lease的有效性
|
|
if (lease_.is_expired()) {// 若Lease过期,则返回空Lease(Lease是否过期只能在本机上进行判断)
|
|
lease_.reset();
|
|
}
|
|
// 1.3 构造prepare ok消息
|
|
prepare_res_accept.set_accepted(ballot_number_, lease_);
|
|
if (CLICK_FAIL(p_election_->send_(prepare_res_accept))) {
|
|
LOG_ELECT_LEADER(ERROR, "fail to send prepare ok", K(prepare_res_accept));
|
|
} else {
|
|
p_election_->event_recorder_.report_vote_event(prepare_res_accept.get_receiver(), vote_reason_);
|
|
LOG_ELECT_LEADER(INFO, "time window closed, send vote", K(prepare_res_accept));
|
|
}
|
|
} else {
|
|
LOG_ELECT_LEADER(ERROR, "give up sending prepare response, casuse ballot number not match", K(prepare_res_accept));
|
|
}
|
|
is_time_window_opened_ = false;// 发出消息后,关闭时间窗口
|
|
}
|
|
}
|
|
last_record_lease_valid_state = lease_valid_state;
|
|
#undef PRINT_WRAPPER
|
|
return false;
|
|
});
|
|
}
|
|
|
|
void ElectionAcceptor::stop()
|
|
{
|
|
ELECT_TIME_GUARD(3_s);
|
|
time_window_task_handle_.stop_and_wait();
|
|
}
|
|
|
|
void ElectionAcceptor::reset_time_window_states_(const LogPhase phase)
|
|
{
|
|
ELECT_TIME_GUARD(500_ms);
|
|
#define PRINT_WRAPPER K(*this)
|
|
if (is_time_window_opened_) {
|
|
is_time_window_opened_ = false;// 推大ballot number的时候要关闭时间窗口
|
|
highest_priority_prepare_req_.reset();
|
|
vote_reason_.reset();
|
|
LOG_PHASE(INFO, phase, "time window closed, not vote");
|
|
}
|
|
#undef PRINT_WRAPPER
|
|
}
|
|
|
|
void ElectionAcceptor::on_prepare_request(const ElectionPrepareRequestMsg &prepare_req)
|
|
{
|
|
ELECT_TIME_GUARD(500_ms);
|
|
#define PRINT_WRAPPER KR(ret), K(prepare_req), K(*this)
|
|
CHECK_SILENCE();// 启动后的要维持一段静默时间,acceptor假装看不到任何消息,以维护lease的正确语义
|
|
int ret = OB_SUCCESS;
|
|
LogPhase phase = (prepare_req.get_role() == common::ObRole::FOLLOWER ? LogPhase::ELECT_LEADER : LogPhase::RENEW_LEASE);
|
|
LOG_PHASE(INFO, phase, "handle prepare request");
|
|
if (OB_UNLIKELY(false == p_election_->is_member_list_valid_())) {
|
|
LOG_PHASE(INFO, phase, "ignore prepare when member_list is invalid");
|
|
} else if (prepare_req.get_membership_version() < p_election_->get_membership_version_()) {
|
|
LOG_PHASE(INFO, phase, "ignore lower membership version request");
|
|
} else if (OB_LIKELY(RequestChecker::check_ballot_valid(prepare_req, this, phase))) {
|
|
// 0. 收到leader prepare的时候无须比较优先级,直接返回投票结果
|
|
if (prepare_req.get_role() == common::ObRole::LEADER) {
|
|
if (prepare_req.get_ballot_number() <= ballot_number_) {
|
|
LOG_PHASE(WARN, phase, "leader prepare message's ballot number is smaller than self");
|
|
} else {
|
|
advance_ballot_number_and_reset_related_states_(prepare_req.get_ballot_number(), phase);
|
|
ElectionPrepareResponseMsg prepare_res_accept(p_election_->get_self_addr(),
|
|
p_election_->get_ls_biggest_min_cluster_version_ever_seen_(),
|
|
prepare_req);
|
|
prepare_res_accept.set_accepted(ballot_number_, lease_);
|
|
if (CLICK_FAIL(p_election_->msg_handler_->send(prepare_res_accept))) {
|
|
LOG_PHASE(WARN, phase, "send prepare response to leader prepare failed");
|
|
} else {
|
|
LOG_PHASE(INFO, phase, "receive valid leader prepare message, send vote to him");
|
|
}
|
|
}
|
|
} else {
|
|
// 1. 遇到比时间窗口更大的ballot number的时候需要关闭当前的时间窗口
|
|
if (is_time_window_opened_ && prepare_req.get_ballot_number() > ballot_of_time_window_) {
|
|
reset_time_window_states_(phase);
|
|
}
|
|
// 2. 若时间窗口未开启,需要开启时间窗口
|
|
if (!is_time_window_opened_ && prepare_req.get_ballot_number() > ballot_of_time_window_) {
|
|
ballot_of_time_window_ = prepare_req.get_ballot_number();
|
|
highest_priority_prepare_req_.reset();
|
|
LOG_PHASE(DEBUG, phase, "advance ballot_of_time_window_");
|
|
int64_t ballot_of_time_window_when_registered = ballot_of_time_window_;
|
|
int64_t timewindow_span = 0;
|
|
if (!lease_.is_expired()) {// 当前Lease有效时,如果有效的时间超过一个最大单程消息延迟,则窗口关闭时机以Lease到期时间为准
|
|
timewindow_span = std::max(lease_.get_lease_end_ts() - get_monotonic_ts(), CALCULATE_TIME_WINDOW_SPAN_TS() / 2);
|
|
} else {// 否则视为普通的无主选举流程,窗口需要覆盖两个最大单程消息延迟
|
|
timewindow_span = CALCULATE_TIME_WINDOW_SPAN_TS();
|
|
}
|
|
if (CLICK_FAIL(time_window_task_handle_.reschedule_after(timewindow_span))) {
|
|
LOG_PHASE(ERROR, phase, "open time window failed");
|
|
} else {
|
|
is_time_window_opened_ = true;// 定时任务注册成功,打开时间窗口
|
|
last_time_window_open_ts_ = ObClockGenerator::getClock();
|
|
LOG_PHASE(INFO, phase, "open time window success", K(timewindow_span));
|
|
}
|
|
}
|
|
// 3. 成员版本号是第一优先级,在成员版本号不小于自己的基础上要比较成员版本号的大小,否则将导致分票
|
|
if (OB_SUCC(ret) && is_time_window_opened_) {// 在时间窗口内,进行计票
|
|
if (prepare_req.get_ballot_number() != ballot_of_time_window_) {
|
|
LOG_PHASE(INFO, phase, "prepare request's ballot is not same as time window, just ignore");
|
|
} else if (!highest_priority_prepare_req_.is_valid()) {
|
|
highest_priority_prepare_req_ = prepare_req;
|
|
LOG_PHASE(INFO, phase, "highest priority prepare message will be replaced casuse cached highest prioriy message is invalid");
|
|
vote_reason_.assign("the only request");
|
|
} else if (prepare_req.get_membership_version() > highest_priority_prepare_req_.get_membership_version()) {
|
|
highest_priority_prepare_req_ = prepare_req;
|
|
LOG_PHASE(INFO, phase, "highest priority prepare message will be replaced casuse new message's membership version is higher");
|
|
vote_reason_.assign("membership_version is higher");
|
|
} else if (prepare_req.get_membership_version() < highest_priority_prepare_req_.get_membership_version()) {
|
|
LOG_PHASE(INFO, phase, "prepare message's membership version not less than self, but not greater than cached highest priority prepare message");
|
|
} else {
|
|
// 4. 比较消息和缓存的最高优先级之间的高低
|
|
if (p_election_->is_rhs_message_higher_(highest_priority_prepare_req_, prepare_req, vote_reason_, true, LogPhase::ELECT_LEADER)) {
|
|
LOG_PHASE(INFO, phase, "highest priority prepare request will be replaced", K(vote_reason_));
|
|
highest_priority_prepare_req_ = prepare_req;
|
|
} else {
|
|
LOG_PHASE(INFO, phase, "ignore prepare request, cause it has lower priority", K(vote_reason_));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#undef PRINT_WRAPPER
|
|
}
|
|
|
|
void ElectionAcceptor::on_accept_request(const ElectionAcceptRequestMsg &accept_req,
|
|
int64_t *us_to_expired)
|
|
{
|
|
ELECT_TIME_GUARD(500_ms);
|
|
#define PRINT_WRAPPER KR(ret), K(accept_req), K(*this)
|
|
CHECK_SILENCE();// 启动后的要维持一段静默时间,acceptor假装看不到任何消息,以维护lease的语义
|
|
int ret = OB_SUCCESS;
|
|
if (OB_LIKELY(RequestChecker::check_ballot_valid(accept_req,
|
|
this,
|
|
LogPhase::RENEW_LEASE))) {
|
|
// 1. 推大ballot number,防止accept lease的ballot number回退
|
|
if (accept_req.get_ballot_number() > ballot_number_) {
|
|
advance_ballot_number_and_reset_related_states_(accept_req.get_ballot_number(), LogPhase::RENEW_LEASE);
|
|
}
|
|
// 2. 无条件更新Lease
|
|
lease_.update_from(accept_req);
|
|
*us_to_expired = lease_.get_lease_end_ts() - get_monotonic_ts();
|
|
// 3. 构造accept ok消息
|
|
ElectionAcceptResponseMsg accept_res_accept(p_election_->get_self_addr(),
|
|
p_election_->inner_priority_seed_,
|
|
p_election_->get_membership_version_(),
|
|
p_election_->get_ls_biggest_min_cluster_version_ever_seen_(),
|
|
accept_req);
|
|
(void) p_election_->refresh_priority_();
|
|
if (CLICK_FAIL(accept_res_accept.set_accepted(ballot_number_,
|
|
p_election_->get_priority_()))) {
|
|
LOG_RENEW_LEASE(ERROR, "fail to copy priority", K(accept_res_accept));
|
|
} else if (CLICK_FAIL(p_election_->send_(accept_res_accept))) {
|
|
LOG_RENEW_LEASE(ERROR, "fail to send msg", K(accept_res_accept));
|
|
}
|
|
}
|
|
#undef PRINT_WRAPPER
|
|
}
|
|
|
|
int64_t ElectionAcceptor::to_string(char *buf, const int64_t buf_len) const
|
|
{
|
|
int64_t pos = 0;
|
|
if (OB_ISNULL(p_election_)) {
|
|
common::databuff_printf(buf, buf_len, pos, "{p_election:NULL");
|
|
} else {
|
|
common::databuff_printf(buf, buf_len, pos, "{ls_id:{id:%ld}", p_election_->id_);
|
|
common::databuff_printf(buf, buf_len, pos, ", addr:%s", to_cstring(p_election_->get_self_addr()));
|
|
}
|
|
common::databuff_printf(buf, buf_len, pos, ", ballot_number:%ld", ballot_number_);
|
|
common::databuff_printf(buf, buf_len, pos, ", ballot_of_time_window:%ld", ballot_of_time_window_);
|
|
common::databuff_printf(buf, buf_len, pos, ", lease:%s", to_cstring(lease_));
|
|
common::databuff_printf(buf, buf_len, pos, ", is_time_window_opened:%s", to_cstring(is_time_window_opened_));
|
|
common::databuff_printf(buf, buf_len, pos, ", vote_reason:%s", to_cstring(vote_reason_));
|
|
common::databuff_printf(buf, buf_len, pos, ", last_time_window_open_ts:%s", ObTime2Str::ob_timestamp_str_range<YEAR, USECOND>(last_time_window_open_ts_));
|
|
if (highest_priority_prepare_req_.is_valid()) {
|
|
common::databuff_printf(buf, buf_len, pos, ", highest_priority_prepare_req:%s",
|
|
to_cstring(highest_priority_prepare_req_));
|
|
}
|
|
common::databuff_printf(buf, buf_len, pos, ", p_election:0x%lx}", (unsigned long)p_election_);
|
|
return pos;
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|