BUGFIX: fix restore ls state after restart
This commit is contained in:
parent
4cc88e0b6e
commit
9c018210bd
@ -713,7 +713,7 @@ int ObLSService::online_ls()
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
common::ObSharedGuard<ObLSIterator> ls_iter;
|
||||
ObLS *ls = nullptr;
|
||||
bool can_replay = true;
|
||||
int64_t create_type = ObLSCreateType::NORMAL;
|
||||
if (OB_FAIL(get_ls_iter(ls_iter, ObLSGetMod::TXSTORAGE_MOD))) {
|
||||
LOG_WARN("failed to get ls iter", K(ret));
|
||||
} else {
|
||||
@ -725,12 +725,13 @@ int ObLSService::online_ls()
|
||||
} else if (nullptr == ls) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_ERROR("ls is null", K(ret));
|
||||
} else if (OB_FAIL(ls->check_can_replay_clog(can_replay))) {
|
||||
LOG_WARN("failed to check ls can replay clog", K(ret), KPC(ls));
|
||||
} else if (!can_replay) {
|
||||
// ls can not enable replay
|
||||
} else if (OB_FAIL(ls->enable_replay())) {
|
||||
LOG_ERROR("fail to enable replay", K(ret));
|
||||
} else {
|
||||
ObLSLockGuard lock_ls(ls);
|
||||
if (OB_FAIL(ls->get_create_type(create_type))) {
|
||||
LOG_WARN("get ls create type failed", K(ret));
|
||||
} else if (OB_FAIL(post_create_ls_(create_type, ls))) {
|
||||
LOG_WARN("post create ls failed", K(ret));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (OB_ITER_END == ret) {
|
||||
|
@ -88,6 +88,13 @@ bool ObLSRestoreStatus::is_valid_(int32_t status) const
|
||||
}
|
||||
|
||||
#undef LS_RESTORE_STATUS_CASE_TO_TYPE
|
||||
bool ObLSRestoreStatus::need_online() const
|
||||
{
|
||||
return ((status_ >= WAIT_RESTORE_SYS_TABLETS
|
||||
&& status_ <= WAIT_RESTORE_MAJOR_DATA)
|
||||
|| status_ == NONE
|
||||
|| status_ == CLONE_CLOG_REPLAY);
|
||||
}
|
||||
|
||||
int ObLSRestoreStatus::set_status(int32_t status)
|
||||
{
|
||||
|
@ -110,7 +110,9 @@ public:
|
||||
}
|
||||
|
||||
// offline ls and enable sync and online ls restore handler in [RESTORE_START, RESTORE_SYS_TABLETS] or RESTORE_FAILED
|
||||
bool is_required_to_switch_ls_state_for_restore() const
|
||||
bool need_online() const;
|
||||
// enable sync and online ls restore handler in [RESTORE_START, RESTORE_SYS_TABLETS] or RESTORE_FAILED
|
||||
bool is_restore_first_step() const
|
||||
{
|
||||
return ((status_ >= Status::RESTORE_START && status_ <= Status::RESTORE_SYS_TABLETS) ||
|
||||
status_ == Status::RESTORE_FAILED);
|
||||
@ -122,7 +124,7 @@ public:
|
||||
{
|
||||
return status_ >= Status::CLONE_START && status_ <= Status::CLONE_COPY_ALL_TABLET_META;
|
||||
}
|
||||
bool is_required_to_switch_ls_state_for_clone() const
|
||||
bool is_clone_first_step() const
|
||||
{
|
||||
return ((status_ >= Status::CLONE_START && status_ <= Status::CLONE_COPY_LS_META) ||
|
||||
Status::CLONE_FAILED == status_);
|
||||
|
@ -594,6 +594,11 @@ bool ObMigrationStatusHelper::check_migration_status_is_fail_(const ObMigrationS
|
||||
return is_fail;
|
||||
}
|
||||
|
||||
bool ObMigrationStatusHelper::need_online(const ObMigrationStatus &cur_status)
|
||||
{
|
||||
return (OB_MIGRATION_STATUS_NONE == cur_status);
|
||||
}
|
||||
|
||||
bool ObMigrationStatusHelper::check_allow_gc_abandoned_ls(const ObMigrationStatus &cur_status)
|
||||
{
|
||||
bool allow_gc = false;
|
||||
|
@ -81,6 +81,7 @@ public:
|
||||
const ObMigrationStatus &cur_status,
|
||||
bool &allow_gc);
|
||||
// Check the migration status. The LS in the XXX_FAIL state is considered to be an abandoned LS, which can be judged to be directly GC when restarting
|
||||
static bool need_online(const ObMigrationStatus &cur_status);
|
||||
static bool check_allow_gc_abandoned_ls(const ObMigrationStatus &cur_status);
|
||||
static bool check_can_migrate_out(const ObMigrationStatus &cur_status);
|
||||
static int check_can_change_status(
|
||||
|
@ -576,7 +576,7 @@ bool ObLS::is_need_gc() const
|
||||
return bool_ret;
|
||||
}
|
||||
|
||||
bool ObLS::is_required_to_switch_state_for_restore_() const
|
||||
bool ObLS::is_clone_first_step() const
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
bool bool_ret = false;
|
||||
@ -584,12 +584,12 @@ bool ObLS::is_required_to_switch_state_for_restore_() const
|
||||
if (OB_FAIL(ls_meta_.get_restore_status(restore_status))) {
|
||||
LOG_WARN("fail to get restore status", K(ret), K(ls_meta_.ls_id_));
|
||||
} else {
|
||||
bool_ret = restore_status.is_required_to_switch_ls_state_for_restore();
|
||||
bool_ret = restore_status.is_clone_first_step();
|
||||
}
|
||||
return bool_ret;
|
||||
}
|
||||
|
||||
bool ObLS::is_required_to_switch_state_for_clone_() const
|
||||
bool ObLS::is_restore_first_step() const
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
bool bool_ret = false;
|
||||
@ -597,7 +597,7 @@ bool ObLS::is_required_to_switch_state_for_clone_() const
|
||||
if (OB_FAIL(ls_meta_.get_restore_status(restore_status))) {
|
||||
LOG_WARN("fail to get restore status", K(ret), K(ls_meta_.ls_id_));
|
||||
} else {
|
||||
bool_ret = restore_status.is_required_to_switch_ls_state_for_clone();
|
||||
bool_ret = restore_status.is_restore_first_step();
|
||||
}
|
||||
return bool_ret;
|
||||
}
|
||||
@ -2096,47 +2096,16 @@ int ObLS::enable_replay()
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObLS::check_can_online(bool &can_online)
|
||||
int ObLS::check_ls_need_online(bool &need_online)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
can_online = true;
|
||||
if (is_need_gc()) {
|
||||
// this ls will be gc later, should not enable replay
|
||||
can_online = false;
|
||||
} else if (startup_transfer_info_.is_valid()) {
|
||||
// There is a tablet has_transfer_table=true in the log stream
|
||||
can_online = false;
|
||||
LOG_INFO("ls need to wait for dependency to be removed", "ls_id", get_ls_id(),
|
||||
K_(startup_transfer_info));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObLS::check_can_replay_clog(bool &can_replay)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
share::ObLSRestoreStatus restore_status;
|
||||
ObMigrationStatus migration_status = ObMigrationStatus::OB_MIGRATION_STATUS_MAX;
|
||||
can_replay = true;
|
||||
if (is_need_gc()) {
|
||||
// this ls will be gc later, should not enable replay
|
||||
can_replay = false;
|
||||
} else if (OB_FAIL(get_migration_status(migration_status))) {
|
||||
LOG_WARN("failed to get ls migration status", K(ret));
|
||||
} else if (ObMigrationStatus::OB_MIGRATION_STATUS_REBUILD == migration_status) {
|
||||
// ls will online in rebuild process, ls online will enable clog replay
|
||||
can_replay = false;
|
||||
LOG_INFO("ls is in rebuild process, cannot replay clog", "ls_id", get_ls_id(), K(migration_status));
|
||||
} else if (OB_FAIL(get_restore_status(restore_status))) {
|
||||
LOG_WARN("fail to get ls restore status", K(ret));
|
||||
} else if (!restore_status.can_replay_log()) {
|
||||
// while downtime, if ls's restore status is in [restore_start, wait_restore_tablet_meta], clog can't replay
|
||||
can_replay = false;
|
||||
LOG_INFO("restore status not as expected, can not replay clog", "ls_id", get_ls_id(), K(restore_status));
|
||||
} else if (startup_transfer_info_.is_valid()) {
|
||||
// There is a tablet has_transfer_table=true in the log stream, clog can't replay
|
||||
can_replay = false;
|
||||
LOG_INFO("ls not enable clog replay, need to wait for dependency to be removed", "ls_id", get_ls_id(), K_(startup_transfer_info));
|
||||
need_online = true;
|
||||
if (startup_transfer_info_.is_valid()) {
|
||||
// There is a tablet has_transfer_table=true in the log stream, ls can't online
|
||||
need_online = false;
|
||||
LOG_INFO("ls not online, need to wait dependency to be removed", "ls_id", get_ls_id(), K_(startup_transfer_info));
|
||||
} else if (OB_FAIL(ls_meta_.check_ls_need_online(need_online))) {
|
||||
LOG_WARN("fail to check ls need online", K(ret));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -310,6 +310,8 @@ public:
|
||||
bool is_create_committed() const;
|
||||
bool is_need_gc() const;
|
||||
bool is_in_gc();
|
||||
bool is_restore_first_step() const;
|
||||
bool is_clone_first_step() const;
|
||||
// for rebuild
|
||||
// remove inner tablet, the memtable and minor sstable of data tablet, disable replay
|
||||
// int prepare_rebuild();
|
||||
@ -383,7 +385,7 @@ public:
|
||||
int flush_if_need(const bool need_flush);
|
||||
int try_sync_reserved_snapshot(const int64_t new_reserved_snapshot, const bool update_flag);
|
||||
int check_can_replay_clog(bool &can_replay);
|
||||
int check_can_online(bool &can_online);
|
||||
int check_ls_need_online(bool &need_online);
|
||||
|
||||
TO_STRING_KV(K_(running_state), K_(ls_meta), K_(switch_epoch), K_(log_handler), K_(restore_handler), K_(is_inited), K_(tablet_gc_handler), K_(startup_transfer_info));
|
||||
private:
|
||||
@ -405,8 +407,6 @@ private:
|
||||
ObTabletHandle &handle);
|
||||
int offline_advance_epoch_();
|
||||
int online_advance_epoch_();
|
||||
bool is_required_to_switch_state_for_restore_() const;
|
||||
bool is_required_to_switch_state_for_clone_() const;
|
||||
public:
|
||||
// ObLSMeta interface:
|
||||
int update_ls_meta(const bool update_restore_status,
|
||||
|
@ -805,21 +805,34 @@ int ObLSMeta::get_create_type(int64_t &create_type) const
|
||||
if (!is_valid()) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("log stream meta is not valid, cannot get restore status", K(ret), K(*this));
|
||||
// before 4.3
|
||||
} else if (restore_status_.is_required_to_switch_ls_state_for_restore()) {
|
||||
create_type = ObLSCreateType::RESTORE;
|
||||
} else if (ObMigrationStatus::OB_MIGRATION_STATUS_REBUILD == migration_status_) {
|
||||
} else if (ObMigrationStatus::OB_MIGRATION_STATUS_NONE != migration_status_) {
|
||||
create_type = ObLSCreateType::MIGRATE;
|
||||
} else if (restore_status_.is_required_to_switch_ls_state_for_clone()) {
|
||||
} else if (restore_status_.is_in_clone()) {
|
||||
create_type = ObLSCreateType::CLONE;
|
||||
// before 4.3 end
|
||||
// after 4.3
|
||||
} else if (restore_status_.is_in_restore()) {
|
||||
create_type = ObLSCreateType::RESTORE;
|
||||
} else if (ls_persistent_state_.is_ha_state()) {
|
||||
create_type = ObLSCreateType::MIGRATE;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObLSMeta::check_ls_need_online(bool &need_online) const
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
need_online = true;
|
||||
if (!is_valid()) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("log stream meta is not valid", K(ret), K(*this));
|
||||
} else if (!ObMigrationStatusHelper::need_online(migration_status_)) {
|
||||
need_online = false;
|
||||
} else if (ObMigrationStatus::OB_MIGRATION_STATUS_NONE == migration_status_ &&
|
||||
!restore_status_.need_online()) {
|
||||
need_online = false;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
ObLSMeta::ObReentrantWLockGuard::ObReentrantWLockGuard(ObLatch &lock,
|
||||
const bool try_lock,
|
||||
const int64_t warn_threshold)
|
||||
|
@ -108,6 +108,7 @@ public:
|
||||
int set_rebuild_info(const ObLSRebuildInfo &rebuild_info);
|
||||
int get_rebuild_info(ObLSRebuildInfo &rebuild_info) const;
|
||||
int get_create_type(int64_t &create_type) const;
|
||||
int check_ls_need_online(bool &need_online) const;
|
||||
|
||||
int init(
|
||||
const uint64_t tenant_id,
|
||||
|
@ -516,7 +516,7 @@ int ObTabletStartTransferOutCommonHelper::try_enable_dest_ls_clog_replay(
|
||||
ObLSHandle dest_ls_handle;
|
||||
ObLS *dest_ls = NULL;
|
||||
SCN max_decided_scn;
|
||||
bool can_online = true;
|
||||
bool need_online = true;
|
||||
ObLSTransferInfo transfer_info;
|
||||
static const int64_t SLEEP_TS = 100_ms;
|
||||
if (!scn.is_valid() || !dest_ls_id.is_valid()) {
|
||||
@ -543,9 +543,9 @@ int ObTabletStartTransferOutCommonHelper::try_enable_dest_ls_clog_replay(
|
||||
} else {
|
||||
transfer_info = dest_ls->get_ls_startup_transfer_info();
|
||||
dest_ls->get_ls_startup_transfer_info().reset();
|
||||
if (OB_FAIL(dest_ls->check_can_online(can_online))) {
|
||||
if (OB_FAIL(dest_ls->check_ls_need_online(need_online))) {
|
||||
LOG_WARN("failed to check can online", KR(ret), K(dest_ls));
|
||||
} else if (!can_online) {
|
||||
} else if (!need_online) {
|
||||
// do nothing
|
||||
} else if (CLICK_FAIL(dest_ls->online())) {
|
||||
LOG_ERROR("fail to online ls", K(ret), K(scn), K(dest_ls_id), "ls_startup_transfer_info", dest_ls->get_ls_startup_transfer_info());
|
||||
|
@ -480,47 +480,58 @@ int ObLSService::post_create_ls_(const int64_t create_type,
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
switch(create_type) {
|
||||
case ObLSCreateType::NORMAL: {
|
||||
if (OB_FAIL(ls->online_without_lock())) {
|
||||
LOG_ERROR("ls start failed", K(ret));
|
||||
} else if (OB_FAIL(ls->set_start_work_state())) {
|
||||
LOG_ERROR("ls set start work state failed", KR(ret), KPC(ls));
|
||||
} else {
|
||||
bool need_online = false;
|
||||
if (OB_FAIL(ls->check_ls_need_online(need_online))) {
|
||||
LOG_WARN("check ls need online failed", K(ret));
|
||||
} else if (need_online &&
|
||||
OB_FAIL(ls->online_without_lock())) {
|
||||
LOG_ERROR("ls start failed", K(ret));
|
||||
} else {
|
||||
switch(create_type) {
|
||||
case ObLSCreateType::NORMAL: {
|
||||
if (OB_FAIL(ls->set_start_work_state())) {
|
||||
LOG_ERROR("ls set start work state failed", KR(ret), KPC(ls));
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ObLSCreateType::RESTORE: {
|
||||
if (OB_FAIL(ls->get_log_handler()->enable_sync())) {
|
||||
LOG_WARN("failed to enable sync", K(ret));
|
||||
} else if (OB_FAIL(ls->get_ls_restore_handler()->online())) {
|
||||
LOG_WARN("failed to online restore handler", K(ret));
|
||||
} else if (OB_FAIL(ls->set_start_ha_state())) {
|
||||
LOG_ERROR("ls set start ha state failed", KR(ret), KPC(ls));
|
||||
} else {
|
||||
case ObLSCreateType::RESTORE: {
|
||||
if (!need_online && ls->is_restore_first_step()) {
|
||||
if (OB_FAIL(ls->get_log_handler()->enable_sync())) {
|
||||
LOG_WARN("failed to enable sync", K(ret));
|
||||
} else if (OB_FAIL(ls->get_ls_restore_handler()->online())) {
|
||||
LOG_WARN("failed to online restore handler", K(ret));
|
||||
}
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(ls->set_start_ha_state())) {
|
||||
LOG_ERROR("ls set start ha state failed", KR(ret), KPC(ls));
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ObLSCreateType::MIGRATE: {
|
||||
if (OB_FAIL(ls->set_start_ha_state())) {
|
||||
LOG_ERROR("ls set start ha state failed", KR(ret), KPC(ls));
|
||||
case ObLSCreateType::MIGRATE: {
|
||||
if (OB_FAIL(ls->set_start_ha_state())) {
|
||||
LOG_ERROR("ls set start ha state failed", KR(ret), KPC(ls));
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ObLSCreateType::CLONE: {
|
||||
if (OB_FAIL(ls->get_log_handler()->enable_sync())) {
|
||||
LOG_WARN("failed to enable sync", K(ret));
|
||||
} else if (OB_FAIL(ls->set_start_ha_state())) {
|
||||
LOG_ERROR("ls set start ha state failed", KR(ret), KPC(ls));
|
||||
} else {
|
||||
case ObLSCreateType::CLONE: {
|
||||
if (!need_online && ls->is_clone_first_step()) {
|
||||
if (OB_FAIL(ls->get_log_handler()->enable_sync())) {
|
||||
LOG_WARN("failed to enable sync", K(ret));
|
||||
}
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(ls->set_start_ha_state())) {
|
||||
LOG_ERROR("ls set start ha state failed", KR(ret), KPC(ls));
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default: {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_ERROR("should not be here.", KR(ret));
|
||||
} // default
|
||||
} // switch
|
||||
}
|
||||
default: {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_ERROR("should not be here.", KR(ret));
|
||||
} // default
|
||||
} // switch
|
||||
|
||||
if (OB_SUCCESS != (tmp_ret = ls->report_replica_info())) {
|
||||
LOG_WARN("fail to report ls", KR(tmp_ret), KPC(ls));
|
||||
@ -740,7 +751,6 @@ int ObLSService::online_ls()
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
common::ObSharedGuard<ObLSIterator> ls_iter;
|
||||
ObLS *ls = nullptr;
|
||||
bool can_online = true;
|
||||
int64_t create_type = ObLSCreateType::NORMAL;
|
||||
if (OB_FAIL(get_ls_iter(ls_iter, ObLSGetMod::TXSTORAGE_MOD))) {
|
||||
LOG_WARN("failed to get ls iter", K(ret));
|
||||
@ -755,11 +765,7 @@ int ObLSService::online_ls()
|
||||
LOG_ERROR("ls is null", K(ret));
|
||||
} else {
|
||||
ObLSLockGuard lock_ls(ls);
|
||||
if (OB_FAIL(ls->check_can_online(can_online))) {
|
||||
LOG_WARN("check ls can online failed", K(ret));
|
||||
} else if (!can_online) {
|
||||
// ls can not online, do nothing
|
||||
} else if (OB_FAIL(ls->get_create_type(create_type))) {
|
||||
if (OB_FAIL(ls->get_create_type(create_type))) {
|
||||
LOG_WARN("get ls create type failed", K(ret));
|
||||
} else if (OB_FAIL(post_create_ls_(create_type, ls))) {
|
||||
LOG_WARN("post create ls failed", K(ret));
|
||||
|
Loading…
x
Reference in New Issue
Block a user