[SCN] fix farm failure

This commit is contained in:
obdev
2022-11-28 02:58:33 +00:00
committed by ob-robot
parent 87a9357186
commit 51de5b5911
594 changed files with 9722 additions and 7770 deletions

View File

@ -20,6 +20,7 @@
#include "lib/oblog/ob_log.h"
#include "logservice/applyservice/ob_log_apply_service.h"
#include "logservice/replayservice/ob_log_replay_service.h"
#include "logservice/rcservice/ob_role_change_service.h"
#include "logservice/logrpc/ob_log_rpc_req.h"
#include "logservice/palf/log_define.h"
#include "logservice/palf/lsn.h"
@ -39,6 +40,7 @@ ObLogHandler::ObLogHandler() : self_(),
apply_status_(NULL),
apply_service_(NULL),
replay_service_(NULL),
rc_service_(NULL),
deps_lock_(),
lc_cb_(NULL),
rpc_proxy_(NULL),
@ -47,6 +49,7 @@ ObLogHandler::ObLogHandler() : self_(),
last_check_sync_ts_(OB_INVALID_TIMESTAMP),
last_renew_loc_ts_(OB_INVALID_TIMESTAMP),
is_in_stop_state_(true),
is_offline_(false),
is_inited_(false),
get_max_decided_scn_debug_time_(OB_INVALID_TIMESTAMP)
{
@ -61,6 +64,7 @@ int ObLogHandler::init(const int64_t id,
const common::ObAddr &self,
ObLogApplyService *apply_service,
ObLogReplayService *replay_service,
ObRoleChangeService *rc_service,
PalfHandle &palf_handle,
PalfEnv *palf_env,
PalfLocationCacheCb *lc_cb,
@ -88,6 +92,7 @@ int ObLogHandler::init(const int64_t id,
get_max_decided_scn_debug_time_ = OB_INVALID_TIMESTAMP;
apply_service_ = apply_service;
replay_service_ = replay_service;
rc_service_ = rc_service;
apply_status_->inc_ref();
id_ = id;
self_ = self;
@ -97,6 +102,7 @@ int ObLogHandler::init(const int64_t id,
lc_cb_ = lc_cb;
rpc_proxy_ = rpc_proxy;
is_in_stop_state_ = false;
is_offline_ = false;
is_inited_ = true;
FLOG_INFO("ObLogHandler init success", K(id), K(palf_handle));
}
@ -136,11 +142,12 @@ int ObLogHandler::stop()
//判断is_apply_done依赖log handler不能再继续append
//所以需要is_in_stop_state_置true表示stop阶段已经不能再提交日志
int ObLogHandler::safe_to_destroy()
int ObLogHandler::safe_to_destroy(bool &is_safe_destroy)
{
int ret = OB_SUCCESS;
bool is_done = false;
LSN end_lsn;
is_safe_destroy = true;
WLockGuard guard(lock_);
if (IS_INIT) {
if (palf_handle_.is_valid() || !is_in_stop_state_) {
@ -154,6 +161,9 @@ int ObLogHandler::safe_to_destroy()
CLOG_LOG(INFO, "wait apply done finish", K(ret), K(is_done), K(end_lsn), KPC(apply_status_));
}
}
if (OB_FAIL(ret)) {
is_safe_destroy = false;
}
return ret;
}
@ -163,6 +173,7 @@ void ObLogHandler::destroy()
int ret = OB_SUCCESS;
if (IS_INIT) {
is_inited_ = false;
is_offline_ = false;
is_in_stop_state_ = true;
common::ObSpinLockGuard deps_guard(deps_lock_);
apply_service_->revert_apply_status(apply_status_);
@ -172,6 +183,7 @@ void ObLogHandler::destroy()
if (true == palf_handle_.is_valid()) {
palf_env_->close(palf_handle_);
}
rc_service_ = NULL;
lc_cb_ = NULL;
rpc_proxy_ = NULL;
palf_env_ = NULL;
@ -203,7 +215,7 @@ int ObLogHandler::append(const void *buffer,
cb->set_append_start_ts(ObTimeUtility::fast_current_time());
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
} else if (is_in_stop_state_) {
} else if (is_in_stop_state_ || is_offline_) {
ret = OB_NOT_RUNNING;
} else if (LEADER != ATOMIC_LOAD(&role_)) {
ret = OB_NOT_MASTER;
@ -1328,6 +1340,52 @@ int ObLogHandler::diagnose(LogHandlerDiagnoseInfo &diagnose_info) const
return ret;
}
int ObLogHandler::offline()
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
} else if (true == is_in_stop_state_) {
ret = OB_NOT_RUNNING;
} else if (OB_FAIL(disable_replay())) {
CLOG_LOG(WARN, "disable_replay failed", K(ret), KPC(this));
} else if (OB_FAIL(disable_sync())) {
CLOG_LOG(WARN, "disable_sync failed", K(ret), KPC(this));
} else {
WLockGuard guard(lock_);
// NB: make proposal_id_ to be invalid:
// 1. avoid append success.
// 2. make role change success(role change service require proposal_id of log_handler is not same as palf)
// 3. don't make role to follower at here, otherwise, role change thread will execute follower to follower.
proposal_id_ = INVALID_PROPOSAL_ID;
// NB:
// 1. After set 'is_offline_' to true, we must prohibit apply log, otherwise,
// log handler may be come LEADER after offline, and the proposal id of apply
// is -1, update committed end ls of appy will print ERROR logs.
//
// 2. Must reset proposal_id of apply_status_ before set 'is_offline', otherwise,
// concurrent 'switch to follower' event may set apply status to FOLLOWER, however,
// there are some uncommitted logs in PALF. and before reset_proposal_id, these
// uncommitted logs has been committed. and then update committed end ls of apply
// will print ERROR logs, because the role of apply is FOLLOWER, and the proposal_id
// of apply is as same as PALF.
apply_status_->reset_proposal_id();
//
// 3. Must keep the order of set 'is_offline_' between reset the proposal id of apply.
//
MEM_BARRIER();
is_offline_ = true;
// NB: must ensure on_role_change not fail.
if (OB_FAIL(rc_service_->on_role_change(id_))) {
CLOG_LOG(ERROR, "on_role_change failed", K(ret), KPC(this));
} else {
CLOG_LOG(INFO, "LogHandler offline success", K(ret), KPC(this));
}
}
return ret;
}
int ObLogHandler::diagnose_palf(palf::PalfDiagnoseInfo &diagnose_info) const
{
int ret = OB_SUCCESS;
@ -1341,5 +1399,44 @@ int ObLogHandler::diagnose_palf(palf::PalfDiagnoseInfo &diagnose_info) const
}
return ret;
}
int ObLogHandler::online(const LSN &lsn, const SCN &scn)
{
int ret = OB_SUCCESS;
SCN max_decided_scn;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
} else if (true == is_in_stop_state_) {
ret = OB_NOT_RUNNING;
} else if (OB_FAIL(get_max_decided_scn(max_decided_scn))) {
CLOG_LOG(WARN, "get_max_decided_log_scn failed", K(ret), KPC(this));
} else if (scn < max_decided_scn) {
ret = OB_NOT_SUPPORTED;
CLOG_LOG(WARN, "base scn is less than max decided scn, not supported",
K(ret), KPC(this), K(scn), K(max_decided_scn));
} else if (OB_FAIL(enable_replay(lsn, scn))) {
CLOG_LOG(WARN, "enable_replay failed", K(ret), KPC(this), K(lsn), K(scn));
} else if (OB_FAIL(enable_sync())) {
CLOG_LOG(WARN, "enable_sync failed", K(ret), KPC(this));
} else {
WLockGuard guard(lock_);
proposal_id_ = INVALID_PROPOSAL_ID;
is_offline_ = false;
// NB: before notify role change service, we need set role to FOLLOWER,
// otherwise, role change service may need switch leader to leader.
role_ = common::FOLLOWER;
if (OB_FAIL(rc_service_->on_role_change(id_))) {
CLOG_LOG(WARN, "on_role_change failed", K(ret), KPC(this));
} else {
CLOG_LOG(INFO, "LogHander online success", K(ret), KPC(this), K(lsn), K(scn));
}
}
return ret;
}
bool ObLogHandler::is_offline() const
{
return true == ATOMIC_LOAD(&is_offline_);
}
} // end namespace logservice
} // end napespace oceanbase