check_not_backup_tablet_create_scn mutex with rebuild
This commit is contained in:
@ -797,35 +797,60 @@ int ObService::check_not_backup_tablet_create_scn(const obrpc::ObBackupCheckTabl
|
|||||||
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
|
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
|
||||||
ret = OB_ERR_UNEXPECTED;
|
ret = OB_ERR_UNEXPECTED;
|
||||||
LOG_WARN("log stream should not be NULL", KR(ret), K(arg.tenant_id_), K(arg.ls_id_), KPC(ls));
|
LOG_WARN("log stream should not be NULL", KR(ret), K(arg.tenant_id_), K(arg.ls_id_), KPC(ls));
|
||||||
} else if (OB_ISNULL(ls_tablet_svr = ls->get_tablet_svr())) {
|
|
||||||
ret = OB_ERR_UNEXPECTED;
|
|
||||||
LOG_WARN("ls tablet service should not be NULL", KR(ret), K(arg.tenant_id_), K(arg.ls_id_), KPC(ls));
|
|
||||||
} else {
|
} else {
|
||||||
const int64_t timeout_us = ObTabletCommon::DIRECT_GET_COMMITTED_TABLET_TIMEOUT_US;
|
const int64_t rebuild_seq = ls->get_rebuild_seq();
|
||||||
ObTabletHandle tablet_handle;
|
ObMigrationStatus migration_status;
|
||||||
for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids.count(); ++i) {
|
share::ObLSRestoreStatus restore_status;
|
||||||
const ObTabletID &tablet_id = tablet_ids.at(i);
|
if (OB_FAIL(ls->is_offline())) {
|
||||||
tablet_handle.reset();
|
ret = OB_EAGAIN;
|
||||||
if (OB_FAIL(ls_tablet_svr->get_tablet(tablet_id, tablet_handle, timeout_us))) {
|
LOG_WARN("ls is offline, retry later", K(ret), KPC(ls));
|
||||||
if (OB_TABLET_NOT_EXIST == ret) {
|
} else if (OB_FAIL(ls->get_migration_status(migration_status))) {
|
||||||
LOG_INFO("tablet has been deleted, no need to check", K(tablet_id));
|
LOG_WARN("failed to get migration status", K(ret), KPC(ls));
|
||||||
ret = OB_SUCCESS;
|
} else if (storage::ObMigrationStatus::OB_MIGRATION_STATUS_NONE != migration_status) {
|
||||||
} else {
|
ret = OB_EAGAIN;
|
||||||
LOG_WARN("failed to get tablet", KR(ret), K(tablet_id), K(timeout_us));
|
LOG_WARN("ls is in migration, retry later", K(ret), KPC(ls));
|
||||||
}
|
} else if (OB_FAIL(ls->get_restore_status(restore_status))) {
|
||||||
} else if (OB_UNLIKELY(!tablet_handle.is_valid())) {
|
LOG_WARN("failed to get restore status", K(ret), KPC(ls));
|
||||||
|
} else if (share::ObLSRestoreStatus::RESTORE_NONE != restore_status) {
|
||||||
|
ret = OB_ERR_UNEXPECTED;
|
||||||
|
LOG_WARN("restore ls is unexpected", K(ret), KPC(ls));
|
||||||
|
} else {
|
||||||
|
if (OB_ISNULL(ls_tablet_svr = ls->get_tablet_svr())) {
|
||||||
ret = OB_ERR_UNEXPECTED;
|
ret = OB_ERR_UNEXPECTED;
|
||||||
LOG_WARN("unexpected error : tablet handle is invalid", KR(ret), K(tablet_handle));
|
LOG_WARN("ls tablet service should not be NULL", KR(ret), K(arg.tenant_id_), K(arg.ls_id_), KPC(ls));
|
||||||
} else {
|
} else {
|
||||||
const ObTabletMeta &tablet_meta = tablet_handle.get_obj()->get_tablet_meta();
|
const int64_t timeout_us = ObTabletCommon::DIRECT_GET_COMMITTED_TABLET_TIMEOUT_US;
|
||||||
if (OB_UNLIKELY(tablet_meta.create_scn_ <= arg.backup_scn_)) {
|
ObTabletHandle tablet_handle;
|
||||||
ret = OB_ERR_UNEXPECTED;
|
for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids.count(); ++i) {
|
||||||
LOG_WARN("unexpected error : tablet has't been backup", KR(ret),
|
const ObTabletID &tablet_id = tablet_ids.at(i);
|
||||||
K(arg.tenant_id_), K(arg.ls_id_), K(tablet_id),
|
tablet_handle.reset();
|
||||||
K(tablet_meta), "backup_scn", arg.backup_scn_);
|
if (OB_FAIL(ls_tablet_svr->get_tablet(tablet_id, tablet_handle, timeout_us))) {
|
||||||
|
if (OB_TABLET_NOT_EXIST == ret) {
|
||||||
|
LOG_INFO("tablet has been deleted, no need to check", K(tablet_id));
|
||||||
|
ret = OB_SUCCESS;
|
||||||
|
} else {
|
||||||
|
LOG_WARN("failed to get tablet", KR(ret), K(tablet_id), K(timeout_us));
|
||||||
|
}
|
||||||
|
} else if (OB_UNLIKELY(!tablet_handle.is_valid())) {
|
||||||
|
ret = OB_ERR_UNEXPECTED;
|
||||||
|
LOG_WARN("unexpected error : tablet handle is invalid", KR(ret), K(tablet_handle));
|
||||||
|
} else {
|
||||||
|
const ObTabletMeta &tablet_meta = tablet_handle.get_obj()->get_tablet_meta();
|
||||||
|
if (OB_UNLIKELY(tablet_meta.create_scn_ <= arg.backup_scn_)) {
|
||||||
|
ret = OB_ERR_UNEXPECTED;
|
||||||
|
LOG_WARN("unexpected error : tablet has't been backup", KR(ret),
|
||||||
|
K(arg.tenant_id_), K(arg.ls_id_), K(tablet_id),
|
||||||
|
K(tablet_meta), "backup_scn", arg.backup_scn_);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (OB_FAIL(ret)) {
|
||||||
|
} else if (rebuild_seq != ls->get_rebuild_seq()) {
|
||||||
|
ret = OB_EAGAIN;
|
||||||
|
LOG_WARN("ls has rebuild, retry later", K(ret), KPC(ls));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -82,10 +82,10 @@ public:
|
|||||||
bool is_wait_status() const { return is_wait_restore_sys_tablets() || is_wait_restore_tablets_meta()
|
bool is_wait_status() const { return is_wait_restore_sys_tablets() || is_wait_restore_tablets_meta()
|
||||||
|| is_wait_quick_restore() || is_wait_restore_major_data(); }
|
|| is_wait_quick_restore() || is_wait_restore_major_data(); }
|
||||||
|
|
||||||
// Don't load inner tablet while downtime and restart if restore status is in [RESTORE_START, RESTORE_SYS_TABLETS] or RESTORE_FAILED
|
// enable sync and online ls restore handler in [RESTORE_START, RESTORE_SYS_TABLETS] or RESTORE_FAILED
|
||||||
bool is_need_load_inner_tablet() const
|
bool is_enable_for_restore() const
|
||||||
{
|
{
|
||||||
return !((status_ >= Status::RESTORE_START && status_ <= Status::RESTORE_SYS_TABLETS) ||
|
return ((status_ >= Status::RESTORE_START && status_ <= Status::RESTORE_SYS_TABLETS) ||
|
||||||
status_ == Status::RESTORE_FAILED);
|
status_ == Status::RESTORE_FAILED);
|
||||||
}
|
}
|
||||||
// if restore status is not in [RESTORE_START, WAIT_RESTORE_TABLETS_META], log_replay_service can replay log.
|
// if restore status is not in [RESTORE_START, WAIT_RESTORE_TABLETS_META], log_replay_service can replay log.
|
||||||
|
|||||||
@ -1033,7 +1033,6 @@ int ObStartMigrationTask::deal_with_local_ls_()
|
|||||||
int64_t proposal_id = 0;
|
int64_t proposal_id = 0;
|
||||||
ObLSMeta local_ls_meta;
|
ObLSMeta local_ls_meta;
|
||||||
logservice::ObLogService *log_service = nullptr;
|
logservice::ObLogService *log_service = nullptr;
|
||||||
|
|
||||||
if (!is_inited_) {
|
if (!is_inited_) {
|
||||||
ret = OB_NOT_INIT;
|
ret = OB_NOT_INIT;
|
||||||
LOG_WARN("start migration task do not init", K(ret));
|
LOG_WARN("start migration task do not init", K(ret));
|
||||||
@ -1393,20 +1392,14 @@ int ObStartMigrationTask::deal_local_restore_ls_(bool &need_generate_dag)
|
|||||||
} else if (ls_restore_status.is_restore_failed()) {
|
} else if (ls_restore_status.is_restore_failed()) {
|
||||||
ret = OB_ERR_UNEXPECTED;
|
ret = OB_ERR_UNEXPECTED;
|
||||||
LOG_WARN("ls restore status is not expected", K(ret), KPC(ctx_), KPC(ls), K(ls_restore_status));
|
LOG_WARN("ls restore status is not expected", K(ret), KPC(ctx_), KPC(ls), K(ls_restore_status));
|
||||||
} else if (ls_restore_status.is_restore_start() || ls_restore_status.is_restore_sys_tablets()) {
|
} else if (ls_restore_status.is_restore_start()) {
|
||||||
if (OB_FAIL(ls->get_log_handler()->enable_sync())) {
|
ret = OB_SRC_DO_NOT_ALLOWED_MIGRATE;
|
||||||
LOG_WARN("failed to enable log sync", K(ret), KPC(ctx_), KPC(ls));
|
LOG_WARN("src ls is in restore start, wait later", K(ret), KPC(ls));
|
||||||
} else if (OB_FAIL(ls->get_tablet_svr()->online())) {
|
} else if (ls_restore_status.is_restore_sys_tablets()) {
|
||||||
LOG_WARN("failed to online tablet svr", K(ret), KPC(ctx_), KPC(ls));
|
need_generate_dag = false;
|
||||||
} else if (OB_FAIL(ls->get_tx_svr()->online())) {
|
if (OB_FAIL(ls->enable_for_restore())) {
|
||||||
LOG_WARN("failed to online tx svr", K(ret), KPC(ctx_), KPC(ls));
|
LOG_WARN("failed to enable for restore", K(ret));
|
||||||
} else if (OB_FAIL(ls->get_ddl_log_handler()->online())) {
|
|
||||||
LOG_WARN("failed to online ddl log handler", K(ret), KPC(ctx_), KPC(ls));
|
|
||||||
} else if (OB_FAIL(ls->get_ls_wrs_handler()->online())) {
|
|
||||||
LOG_WARN("failed to online ls wrs handler", K(ret), KPC(ctx_), KPC(ls));
|
|
||||||
} else if (OB_FALSE_IT(ls->get_checkpoint_executor()->online())) {
|
|
||||||
} else {
|
} else {
|
||||||
need_generate_dag = false;
|
|
||||||
LOG_INFO("ls restore status is in restore start or in restore sys tablets, no need generate dag",
|
LOG_INFO("ls restore status is in restore start or in restore sys tablets, no need generate dag",
|
||||||
K(ls_restore_status), "ls_id", ctx_->arg_.ls_id_);
|
K(ls_restore_status), "ls_id", ctx_->arg_.ls_id_);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -373,7 +373,7 @@ bool ObLS::is_need_gc() const
|
|||||||
return bool_ret;
|
return bool_ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ObLS::is_need_load_inner_tablet() const
|
bool ObLS::is_enable_for_restore() const
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
bool bool_ret = false;
|
bool bool_ret = false;
|
||||||
@ -381,7 +381,7 @@ bool ObLS::is_need_load_inner_tablet() const
|
|||||||
if (OB_FAIL(ls_meta_.get_restore_status(restore_status))) {
|
if (OB_FAIL(ls_meta_.get_restore_status(restore_status))) {
|
||||||
LOG_WARN("fail to get restore status", K(ret), K(ls_meta_.ls_id_));
|
LOG_WARN("fail to get restore status", K(ret), K(ls_meta_.ls_id_));
|
||||||
} else {
|
} else {
|
||||||
bool_ret = restore_status.is_need_load_inner_tablet();
|
bool_ret = restore_status.is_enable_for_restore();
|
||||||
}
|
}
|
||||||
return bool_ret;
|
return bool_ret;
|
||||||
}
|
}
|
||||||
@ -684,6 +684,8 @@ int ObLS::offline_()
|
|||||||
} else if (FALSE_IT(is_offlined_ = true)) {
|
} else if (FALSE_IT(is_offlined_ = true)) {
|
||||||
} else if (FALSE_IT(checkpoint_executor_.offline())) {
|
} else if (FALSE_IT(checkpoint_executor_.offline())) {
|
||||||
LOG_WARN("checkpoint executor offline failed", K(ret), K(ls_meta_));
|
LOG_WARN("checkpoint executor offline failed", K(ret), K(ls_meta_));
|
||||||
|
} else if (OB_FAIL(ls_restore_handler_.offline())) {
|
||||||
|
LOG_WARN("failed to offline ls restore handler", K(ret));
|
||||||
} else if (OB_FAIL(offline_compaction_())) {
|
} else if (OB_FAIL(offline_compaction_())) {
|
||||||
LOG_WARN("compaction offline failed", K(ret), K(ls_meta_));
|
LOG_WARN("compaction offline failed", K(ret), K(ls_meta_));
|
||||||
} else if (OB_FAIL(ls_wrs_handler_.offline())) {
|
} else if (OB_FAIL(ls_wrs_handler_.offline())) {
|
||||||
@ -735,6 +737,30 @@ int ObLS::offline()
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ObLS::offline_without_lock()
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
int64_t start_ts = ObTimeUtility::current_time();
|
||||||
|
int64_t retry_times = 0;
|
||||||
|
|
||||||
|
do {
|
||||||
|
retry_times++;
|
||||||
|
{
|
||||||
|
if (OB_FAIL(offline_())) {
|
||||||
|
LOG_WARN("ls offline failed", K(ret), K(ls_meta_));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (OB_EAGAIN == ret) {
|
||||||
|
ob_usleep(100 * 1000); // 100 ms
|
||||||
|
if (retry_times % 100 == 0) { // every 10 s
|
||||||
|
LOG_WARN("ls offline use too much time.", K(ls_meta_), K(start_ts));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (OB_EAGAIN == ret);
|
||||||
|
FLOG_INFO("ls offline end", KR(ret), "ls_id", get_ls_id());
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int ObLS::online_tx_()
|
int ObLS::online_tx_()
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
@ -777,6 +803,8 @@ int ObLS::online()
|
|||||||
LOG_WARN("weak read handler online failed", K(ret), K(ls_meta_));
|
LOG_WARN("weak read handler online failed", K(ret), K(ls_meta_));
|
||||||
} else if (OB_FAIL(online_compaction_())) {
|
} else if (OB_FAIL(online_compaction_())) {
|
||||||
LOG_WARN("compaction online failed", K(ret), K(ls_meta_));
|
LOG_WARN("compaction online failed", K(ret), K(ls_meta_));
|
||||||
|
} else if (OB_FAIL(ls_restore_handler_.online())) {
|
||||||
|
LOG_WARN("ls restore handler online failed", K(ret));
|
||||||
} else if (FALSE_IT(checkpoint_executor_.online())) {
|
} else if (FALSE_IT(checkpoint_executor_.online())) {
|
||||||
} else if (FALSE_IT(tablet_gc_handler_.online())) {
|
} else if (FALSE_IT(tablet_gc_handler_.online())) {
|
||||||
} else {
|
} else {
|
||||||
@ -788,6 +816,23 @@ int ObLS::online()
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ObLS::enable_for_restore()
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
int64_t read_lock = 0;
|
||||||
|
int64_t write_lock = LSLOCKALL;
|
||||||
|
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
|
||||||
|
if (IS_NOT_INIT) {
|
||||||
|
ret = OB_NOT_INIT;
|
||||||
|
LOG_WARN("ls is not inited", K(ret));
|
||||||
|
} else if (OB_FAIL(log_handler_.enable_sync())) {
|
||||||
|
LOG_WARN("failed to enable sync", K(ret));
|
||||||
|
} else if (OB_FAIL(ls_restore_handler_.online())) {
|
||||||
|
LOG_WARN("failed to online restore", K(ret));
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int ObLS::get_ls_meta_package(ObLSMetaPackage &meta_package)
|
int ObLS::get_ls_meta_package(ObLSMetaPackage &meta_package)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
@ -1051,10 +1096,18 @@ int ObLS::finish_slog_replay()
|
|||||||
LOG_INFO("this ls should be gc later", KPC(this));
|
LOG_INFO("this ls should be gc later", KPC(this));
|
||||||
// ls will be gc later and tablets in the ls are not complete,
|
// ls will be gc later and tablets in the ls are not complete,
|
||||||
// so skip the following steps, otherwise load_ls_inner_tablet maybe encounter error.
|
// so skip the following steps, otherwise load_ls_inner_tablet maybe encounter error.
|
||||||
} else if (is_need_load_inner_tablet() && OB_FAIL(load_ls_inner_tablet())) {
|
|
||||||
LOG_WARN("ls load inner tablet failed", K(ret), KPC(this));
|
|
||||||
} else if (OB_FAIL(start())) {
|
} else if (OB_FAIL(start())) {
|
||||||
LOG_WARN("ls can not start to work", K(ret));
|
LOG_WARN("ls can not start to work", K(ret));
|
||||||
|
} else if (is_enable_for_restore()) {
|
||||||
|
if (OB_FAIL(offline_())) {
|
||||||
|
LOG_WARN("failed to offline", K(ret), KPC(this));
|
||||||
|
} else if (OB_FAIL(log_handler_.enable_sync())) {
|
||||||
|
LOG_WARN("failed to enable sync", K(ret), KPC(this));
|
||||||
|
} else if (OB_FAIL(ls_restore_handler_.online())) {
|
||||||
|
LOG_WARN("failed to online ls restore handler", K(ret), KPC(this));
|
||||||
|
}
|
||||||
|
} else if (OB_FAIL(load_ls_inner_tablet())) {
|
||||||
|
LOG_WARN("ls load inner tablet failed", K(ret), KPC(this));
|
||||||
} else {
|
} else {
|
||||||
// do nothing
|
// do nothing
|
||||||
}
|
}
|
||||||
|
|||||||
@ -154,6 +154,8 @@ public:
|
|||||||
void destroy();
|
void destroy();
|
||||||
int offline();
|
int offline();
|
||||||
int online();
|
int online();
|
||||||
|
int offline_without_lock();
|
||||||
|
int enable_for_restore();
|
||||||
bool is_offline() const { return is_offlined_; } // mock function, TODO(@yanyuan)
|
bool is_offline() const { return is_offlined_; } // mock function, TODO(@yanyuan)
|
||||||
|
|
||||||
ObLSTxService *get_tx_svr() { return &ls_tx_svr_; }
|
ObLSTxService *get_tx_svr() { return &ls_tx_svr_; }
|
||||||
@ -200,7 +202,7 @@ public:
|
|||||||
void set_create_state(const ObInnerLSStatus new_status);
|
void set_create_state(const ObInnerLSStatus new_status);
|
||||||
ObInnerLSStatus get_create_state() const;
|
ObInnerLSStatus get_create_state() const;
|
||||||
bool is_need_gc() const;
|
bool is_need_gc() const;
|
||||||
bool is_need_load_inner_tablet() const;
|
bool is_enable_for_restore() const;
|
||||||
// for rebuild
|
// for rebuild
|
||||||
// remove inner tablet, the memtable and minor sstable of data tablet, disable replay
|
// remove inner tablet, the memtable and minor sstable of data tablet, disable replay
|
||||||
// int prepare_rebuild();
|
// int prepare_rebuild();
|
||||||
|
|||||||
@ -42,12 +42,13 @@ using namespace logservice;
|
|||||||
ObLSRestoreHandler::ObLSRestoreHandler()
|
ObLSRestoreHandler::ObLSRestoreHandler()
|
||||||
: is_inited_(false),
|
: is_inited_(false),
|
||||||
is_stop_(false),
|
is_stop_(false),
|
||||||
|
is_online_(true),
|
||||||
|
rebuild_seq_(0),
|
||||||
result_mgr_(),
|
result_mgr_(),
|
||||||
ls_(nullptr),
|
ls_(nullptr),
|
||||||
ls_restore_arg_(),
|
ls_restore_arg_(),
|
||||||
state_handler_(nullptr),
|
state_handler_(nullptr),
|
||||||
allocator_(),
|
allocator_()
|
||||||
rebuild_seq_(0)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,6 +89,74 @@ void ObLSRestoreHandler::destroy()
|
|||||||
ls_ = nullptr;
|
ls_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ObLSRestoreHandler::offline()
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
if (IS_NOT_INIT) {
|
||||||
|
ret = OB_NOT_INIT;
|
||||||
|
LOG_WARN("not init", K(ret));
|
||||||
|
} else {
|
||||||
|
lib::ObMutexGuard guard(mtx_);
|
||||||
|
if (OB_FAIL(cancel_task_())) {
|
||||||
|
LOG_WARN("failed to cancel task", K(ret), KPC(ls_));
|
||||||
|
} else {
|
||||||
|
is_online_ = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ObLSRestoreHandler::online()
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
share::ObLSRestoreStatus new_status;
|
||||||
|
ObILSRestoreState *new_state_handler = nullptr;
|
||||||
|
|
||||||
|
if (IS_NOT_INIT) {
|
||||||
|
ret = OB_NOT_INIT;
|
||||||
|
LOG_WARN("not init", K(ret));
|
||||||
|
} else if (is_online_) {
|
||||||
|
// do nothing
|
||||||
|
LOG_INFO("ls is online", KPC(ls_));
|
||||||
|
} else if (OB_FAIL(ls_->get_restore_status(new_status))) {
|
||||||
|
LOG_WARN("fail to get_restore_status", K(ret), KPC(ls_));
|
||||||
|
} else if (new_status.is_restore_none()) {
|
||||||
|
is_online_ = true;
|
||||||
|
} else {
|
||||||
|
lib::ObMutexGuard guard(mtx_);
|
||||||
|
// online after rebuild or migrate. the restore status may changed.
|
||||||
|
// so, we refresh the restore state handler according to the new ls restore status.
|
||||||
|
if (OB_FAIL(fill_restore_arg_())) {
|
||||||
|
LOG_WARN("fail to fill restore arg", K(ret));
|
||||||
|
} else if (OB_FAIL(get_restore_state_handler_(new_status, new_state_handler))) {
|
||||||
|
LOG_WARN("fail to get restore state handler", K(ret), K(new_status));
|
||||||
|
} else {
|
||||||
|
if (nullptr != state_handler_) {
|
||||||
|
// when online, the old task should be cancel.
|
||||||
|
if (OB_FAIL(state_handler_->get_tablet_mgr().cancel_task())) {
|
||||||
|
LOG_WARN("failed to cancel task", K(ret));
|
||||||
|
} else {
|
||||||
|
state_handler_->~ObILSRestoreState();
|
||||||
|
allocator_.free(state_handler_);
|
||||||
|
state_handler_ = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (OB_SUCC(ret)) {
|
||||||
|
state_handler_ = new_state_handler;
|
||||||
|
is_online_ = true;
|
||||||
|
new_state_handler = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OB_FAIL(ret) && nullptr != new_state_handler) {
|
||||||
|
new_state_handler->~ObILSRestoreState();
|
||||||
|
allocator_.free(new_state_handler);
|
||||||
|
new_state_handler = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int ObLSRestoreHandler::record_clog_failed_info(
|
int ObLSRestoreHandler::record_clog_failed_info(
|
||||||
const share::ObTaskId &trace_id, const share::ObLSID &ls_id, const int &result)
|
const share::ObTaskId &trace_id, const share::ObLSID &ls_id, const int &result)
|
||||||
{
|
{
|
||||||
@ -128,7 +197,10 @@ int ObLSRestoreHandler::handle_execute_over(
|
|||||||
status = state_handler_->get_restore_status();
|
status = state_handler_->get_restore_status();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (status.is_restore_sys_tablets()) {
|
if (OB_CANCELED == result) {
|
||||||
|
//do nothing
|
||||||
|
LOG_WARN("task has been canceled", KPC(ls_), K(task_id));
|
||||||
|
} else if (status.is_restore_sys_tablets()) {
|
||||||
state_handler_->set_retry_flag();
|
state_handler_->set_retry_flag();
|
||||||
result_mgr_.set_result(result, task_id, ObLSRestoreResultMgr::RestoreFailedType::DATA_RESTORE_FAILED_TYPE);
|
result_mgr_.set_result(result, task_id, ObLSRestoreResultMgr::RestoreFailedType::DATA_RESTORE_FAILED_TYPE);
|
||||||
LOG_WARN("restore sys tablets dag failed, need retry", K(ret));
|
LOG_WARN("restore sys tablets dag failed, need retry", K(ret));
|
||||||
@ -157,9 +229,11 @@ int ObLSRestoreHandler::handle_pull_tablet(
|
|||||||
if (IS_NOT_INIT) {
|
if (IS_NOT_INIT) {
|
||||||
ret = OB_NOT_INIT;
|
ret = OB_NOT_INIT;
|
||||||
LOG_WARN("not init", K(ret));
|
LOG_WARN("not init", K(ret));
|
||||||
|
} else if (is_stop_ || !is_online_) {
|
||||||
|
LOG_WARN("ls stopped or disabled", KPC(ls_));
|
||||||
} else if (OB_ISNULL(state_handler_)) {
|
} else if (OB_ISNULL(state_handler_)) {
|
||||||
// server may downtime and restart, but it has't inited state handler, so state_handler_ may be null.
|
// server may downtime and restart, but it has't inited state handler, so state_handler_ may be null.
|
||||||
LOG_WARN("need restart, wait later");
|
LOG_WARN("need restart, wait later", KPC(ls_));
|
||||||
} else if (OB_FAIL(state_handler_->handle_pull_tablet(tablet_ids, leader_restore_status))) {
|
} else if (OB_FAIL(state_handler_->handle_pull_tablet(tablet_ids, leader_restore_status))) {
|
||||||
LOG_WARN("fail to handl pull tablet", K(ret), K(leader_restore_status));
|
LOG_WARN("fail to handl pull tablet", K(ret), K(leader_restore_status));
|
||||||
}
|
}
|
||||||
@ -188,7 +262,10 @@ int ObLSRestoreHandler::process()
|
|||||||
// it tasks a period of time for the ls leader is ready after the shutdown and restart of observer usually,
|
// it tasks a period of time for the ls leader is ready after the shutdown and restart of observer usually,
|
||||||
// and an ls leader not exist error will be returned before leader is ready.
|
// and an ls leader not exist error will be returned before leader is ready.
|
||||||
// so in order to improve availability, we need control the retry frequency and the default retry time interval is 10s.
|
// so in order to improve availability, we need control the retry frequency and the default retry time interval is 10s.
|
||||||
if (OB_FAIL(state_handler_->do_restore())) {
|
lib::ObMutexGuard guard(mtx_);
|
||||||
|
if (is_stop_ || !is_online_) {
|
||||||
|
LOG_INFO("ls stopped or disabled", KPC(ls_));
|
||||||
|
} else if (OB_FAIL(state_handler_->do_restore())) {
|
||||||
ObTaskId trace_id(*ObCurTraceId::get_trace_id());
|
ObTaskId trace_id(*ObCurTraceId::get_trace_id());
|
||||||
result_mgr_.set_result(ret, trace_id, ObLSRestoreResultMgr::RestoreFailedType::DATA_RESTORE_FAILED_TYPE);
|
result_mgr_.set_result(ret, trace_id, ObLSRestoreResultMgr::RestoreFailedType::DATA_RESTORE_FAILED_TYPE);
|
||||||
LOG_WARN("fail to do restore", K(ret), KPC(state_handler_));
|
LOG_WARN("fail to do restore", K(ret), KPC(state_handler_));
|
||||||
@ -489,30 +566,36 @@ void ObLSRestoreHandler::wakeup()
|
|||||||
int ObLSRestoreHandler::safe_to_destroy(bool &is_safe)
|
int ObLSRestoreHandler::safe_to_destroy(bool &is_safe)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
const int64_t start_ts = ObTimeUtil::current_time();
|
|
||||||
const int64_t OB_WAIT_LS_RESTORE_STOP_MS = 200 * 1000; // 200ms
|
|
||||||
is_safe = false;
|
is_safe = false;
|
||||||
if (IS_NOT_INIT) {
|
if (IS_NOT_INIT) {
|
||||||
ret = OB_NOT_INIT;
|
ret = OB_NOT_INIT;
|
||||||
LOG_WARN("ls restore handler do not init", K(ret));
|
LOG_WARN("ls restore handler do not init", K(ret));
|
||||||
} else {
|
} else {
|
||||||
lib::ObMutexGuard guard(mtx_);
|
lib::ObMutexGuard guard(mtx_);
|
||||||
if (OB_ISNULL(state_handler_)) {
|
if (OB_FAIL(cancel_task_())) {
|
||||||
is_safe = true;
|
LOG_WARN("failed to cancel tasks", K(ret), KPC(ls_));
|
||||||
} else {
|
} else {
|
||||||
ObLSRestoreTaskMgr &restore_tablet_mgr = state_handler_->get_tablet_mgr();
|
is_safe = true;
|
||||||
bool is_done = false;
|
is_stop_ = true;
|
||||||
if (OB_FAIL(restore_tablet_mgr.check_all_task_done(is_done))) {
|
|
||||||
LOG_WARN("fail to check all task done", K(ret));
|
|
||||||
} else if (is_done) {
|
|
||||||
is_safe = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOG_INFO("wait ls restore stop", K(ret), K(is_safe), KPC(ls_));
|
LOG_INFO("wait ls restore stop", K(ret), K(is_safe), KPC(ls_));
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ObLSRestoreHandler::cancel_task_()
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
if (OB_ISNULL(state_handler_)) {
|
||||||
|
} else {
|
||||||
|
ObLSRestoreTaskMgr &restore_tablet_mgr = state_handler_->get_tablet_mgr();
|
||||||
|
if (OB_FAIL(restore_tablet_mgr.cancel_task())) {
|
||||||
|
LOG_WARN("fail to check all task done", K(ret));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int ObLSRestoreHandler::update_rebuild_seq()
|
int ObLSRestoreHandler::update_rebuild_seq()
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
@ -520,16 +603,14 @@ int ObLSRestoreHandler::update_rebuild_seq()
|
|||||||
ret = OB_NOT_INIT;
|
ret = OB_NOT_INIT;
|
||||||
LOG_WARN("ls restore handler do not init", K(ret));
|
LOG_WARN("ls restore handler do not init", K(ret));
|
||||||
} else {
|
} else {
|
||||||
lib::ObMutexGuard guard(mtx_);
|
ATOMIC_STORE(&rebuild_seq_, ls_->get_rebuild_seq());
|
||||||
rebuild_seq_ = ls_->get_rebuild_seq();
|
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t ObLSRestoreHandler::get_rebuild_seq()
|
int64_t ObLSRestoreHandler::get_rebuild_seq()
|
||||||
{
|
{
|
||||||
lib::ObMutexGuard guard(mtx_);
|
return ATOMIC_LOAD(&rebuild_seq_);
|
||||||
return rebuild_seq_;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================ObILSRestoreState=======================================
|
//================================ObILSRestoreState=======================================
|
||||||
@ -1170,23 +1251,15 @@ int ObILSRestoreState::check_restore_concurrency_limit_()
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ObILSRestoreState::enable_replay_()
|
int ObILSRestoreState::online_()
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
if (OB_FAIL(ls_->enable_replay())) {
|
if (OB_FAIL(ls_->online())) {
|
||||||
LOG_WARN("enable ls replay failed", K(ret), KPC(ls_));
|
LOG_WARN("online ls failed", K(ret), KPC(ls_));
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ObILSRestoreState::disable_replay_()
|
|
||||||
{
|
|
||||||
int tmp_ret = OB_SUCCESS;
|
|
||||||
if (OB_SUCCESS != (tmp_ret = ls_->disable_replay())) {
|
|
||||||
LOG_WARN("fail to disable replay", K(tmp_ret), KPC(ls_));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int ObILSRestoreState::schedule_tablet_group_restore_(
|
int ObILSRestoreState::schedule_tablet_group_restore_(
|
||||||
const ObTabletGroupRestoreArg &arg,
|
const ObTabletGroupRestoreArg &arg,
|
||||||
const share::ObTaskId &task_id)
|
const share::ObTaskId &task_id)
|
||||||
@ -1390,15 +1463,12 @@ int ObLSRestoreStartState::do_with_no_ls_meta_()
|
|||||||
// this ls doesn't have ls meta and tablet in backup, it only needs to replay clog.
|
// this ls doesn't have ls meta and tablet in backup, it only needs to replay clog.
|
||||||
// so just advance to qucik restore and start replay clog.
|
// so just advance to qucik restore and start replay clog.
|
||||||
ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::QUICK_RESTORE);
|
ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::QUICK_RESTORE);
|
||||||
if (OB_FAIL(enable_replay_())) {
|
if (OB_FAIL(online_())) {
|
||||||
LOG_WARN("fail to enable log", K(ret));
|
LOG_WARN("fail to enable log", K(ret));
|
||||||
} else if (OB_FAIL(advance_status_(*ls_, next_status))) {
|
} else if (OB_FAIL(advance_status_(*ls_, next_status))) {
|
||||||
LOG_WARN("fail to advance status", K(ret), K(*ls_), K(next_status));
|
LOG_WARN("fail to advance status", K(ret), K(*ls_), K(next_status));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OB_FAIL(ret)) {
|
|
||||||
disable_replay_();
|
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1415,7 +1485,7 @@ int ObLSRestoreStartState::do_with_uncreated_ls_()
|
|||||||
LOG_WARN("fail to check ls created", K(ret), KPC(ls_));
|
LOG_WARN("fail to check ls created", K(ret), KPC(ls_));
|
||||||
} else if (is_created) {
|
} else if (is_created) {
|
||||||
// creating ls finished after sys ls restored. cur ls need to do restore.
|
// creating ls finished after sys ls restored. cur ls need to do restore.
|
||||||
} else if (OB_FAIL(enable_replay_())) {
|
} else if (OB_FAIL(online_())) {
|
||||||
LOG_WARN("fail to enable log", K(ret));
|
LOG_WARN("fail to enable log", K(ret));
|
||||||
} else if (OB_FAIL(advance_status_(*ls_, next_status))) {
|
} else if (OB_FAIL(advance_status_(*ls_, next_status))) {
|
||||||
LOG_WARN("fail to advance status", K(ret), KPC(ls_), K(next_status));
|
LOG_WARN("fail to advance status", K(ret), KPC(ls_), K(next_status));
|
||||||
@ -1424,9 +1494,6 @@ int ObLSRestoreStartState::do_with_uncreated_ls_()
|
|||||||
LOG_INFO("no need to restore when sys ls has been restored and the ls doesn't created.", KPC(ls_));
|
LOG_INFO("no need to restore when sys ls has been restored and the ls doesn't created.", KPC(ls_));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OB_FAIL(ret)) {
|
|
||||||
disable_replay_();
|
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1630,7 +1697,7 @@ int ObLSRestoreSysTabletState::leader_restore_sys_tablet_()
|
|||||||
} else if (!tablet_mgr_.is_restore_completed()) {// TODO: check restore finish, should read from extern. fix later
|
} else if (!tablet_mgr_.is_restore_completed()) {// TODO: check restore finish, should read from extern. fix later
|
||||||
} else if (is_need_retry_()) {
|
} else if (is_need_retry_()) {
|
||||||
// next term to retry
|
// next term to retry
|
||||||
} else if (OB_FAIL(ls_->load_ls_inner_tablet())) {
|
} else if (OB_FAIL(online_())) {
|
||||||
LOG_WARN("fail to load ls inner tablet", K(ret));
|
LOG_WARN("fail to load ls inner tablet", K(ret));
|
||||||
} else if (OB_FAIL(ls_->get_ls_restore_handler()->update_rebuild_seq())) {
|
} else if (OB_FAIL(ls_->get_ls_restore_handler()->update_rebuild_seq())) {
|
||||||
LOG_WARN("failed to update rebuild seq", K(ret), KPC(ls_));
|
LOG_WARN("failed to update rebuild seq", K(ret), KPC(ls_));
|
||||||
@ -1639,6 +1706,7 @@ int ObLSRestoreSysTabletState::leader_restore_sys_tablet_()
|
|||||||
} else {
|
} else {
|
||||||
LOG_INFO("leader succ to restore sys tablet", KPC(ls_));
|
LOG_INFO("leader succ to restore sys tablet", KPC(ls_));
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1661,7 +1729,7 @@ int ObLSRestoreSysTabletState::follower_restore_sys_tablet_()
|
|||||||
} else if (!tablet_mgr_.is_restore_completed()) {
|
} else if (!tablet_mgr_.is_restore_completed()) {
|
||||||
} else if (is_need_retry_()) {
|
} else if (is_need_retry_()) {
|
||||||
// next term to retry
|
// next term to retry
|
||||||
} else if (OB_FAIL(ls_->load_ls_inner_tablet())) {
|
} else if (OB_FAIL(online_())) {
|
||||||
LOG_WARN("fail to load ls inner tablet", K(ret));
|
LOG_WARN("fail to load ls inner tablet", K(ret));
|
||||||
} else if (OB_FAIL(ls_->get_ls_restore_handler()->update_rebuild_seq())) {
|
} else if (OB_FAIL(ls_->get_ls_restore_handler()->update_rebuild_seq())) {
|
||||||
LOG_WARN("failed to update rebuild seq", K(ret), KPC(ls_));
|
LOG_WARN("failed to update rebuild seq", K(ret), KPC(ls_));
|
||||||
@ -1670,6 +1738,7 @@ int ObLSRestoreSysTabletState::follower_restore_sys_tablet_()
|
|||||||
} else {
|
} else {
|
||||||
LOG_INFO("follower succ to restore sys tablet", KPC(ls_));
|
LOG_INFO("follower succ to restore sys tablet", KPC(ls_));
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1791,17 +1860,12 @@ int ObLSRestoreCreateUserTabletState::leader_create_user_tablet_()
|
|||||||
} else if (tablet_need_restore.empty()) {
|
} else if (tablet_need_restore.empty()) {
|
||||||
ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::WAIT_RESTORE_TABLETS_META);
|
ObLSRestoreStatus next_status(ObLSRestoreStatus::Status::WAIT_RESTORE_TABLETS_META);
|
||||||
if (!tablet_mgr_.is_restore_completed()) {
|
if (!tablet_mgr_.is_restore_completed()) {
|
||||||
} else if (OB_FAIL(enable_replay_())) {
|
|
||||||
LOG_WARN("fail to enable log", K(ret), KPC(ls_));
|
|
||||||
} else if (OB_FAIL(advance_status_(*ls_, next_status))) {
|
} else if (OB_FAIL(advance_status_(*ls_, next_status))) {
|
||||||
LOG_WARN("fail to advance status", K(ret), KPC(ls_), K(next_status));
|
LOG_WARN("fail to advance status", K(ret), KPC(ls_), K(next_status));
|
||||||
} else {
|
} else {
|
||||||
LOG_INFO("success create leader user tablets", KPC(ls_));
|
LOG_INFO("success create leader user tablets", KPC(ls_));
|
||||||
tablet_mgr_.reuse_set();
|
tablet_mgr_.reuse_set();
|
||||||
}
|
}
|
||||||
if (OB_FAIL(ret)) {
|
|
||||||
disable_replay_();
|
|
||||||
}
|
|
||||||
} else if (OB_FAIL(do_create_user_tablet_(tablet_need_restore))) {
|
} else if (OB_FAIL(do_create_user_tablet_(tablet_need_restore))) {
|
||||||
LOG_WARN("fail to do quick restore", K(ret), K(tablet_need_restore), KPC(ls_));
|
LOG_WARN("fail to do quick restore", K(ret), K(tablet_need_restore), KPC(ls_));
|
||||||
}
|
}
|
||||||
@ -1836,17 +1900,12 @@ int ObLSRestoreCreateUserTabletState::follower_create_user_tablet_()
|
|||||||
if (OB_FAIL(reload_miss_tablet_(all_finish))) {
|
if (OB_FAIL(reload_miss_tablet_(all_finish))) {
|
||||||
LOG_WARN("fail to check follower restore tablet all finish", K(ret), KPC(ls_));
|
LOG_WARN("fail to check follower restore tablet all finish", K(ret), KPC(ls_));
|
||||||
} else if (all_finish) {
|
} else if (all_finish) {
|
||||||
if (OB_FAIL(enable_replay_())) {
|
if (OB_FAIL(advance_status_(*ls_, next_status))) {
|
||||||
LOG_WARN("fail to enable log", K(ret), KPC(ls_));
|
|
||||||
} else if (OB_FAIL(advance_status_(*ls_, next_status))) {
|
|
||||||
LOG_WARN("fail to advance status", K(ret), KPC(ls_), K(next_status));
|
LOG_WARN("fail to advance status", K(ret), KPC(ls_), K(next_status));
|
||||||
} else {
|
} else {
|
||||||
LOG_INFO("success create follower user tablets", KPC(ls_));
|
LOG_INFO("success create follower user tablets", KPC(ls_));
|
||||||
tablet_mgr_.reuse_set();
|
tablet_mgr_.reuse_set();
|
||||||
}
|
}
|
||||||
if (OB_FAIL(ret)) {
|
|
||||||
disable_replay_();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (OB_FAIL(do_create_user_tablet_(tablet_need_restore))) {
|
} else if (OB_FAIL(do_create_user_tablet_(tablet_need_restore))) {
|
||||||
|
|||||||
@ -87,10 +87,13 @@ public:
|
|||||||
void wakeup();
|
void wakeup();
|
||||||
void stop() { ATOMIC_STORE(&is_stop_, true); } // when remove ls, set this
|
void stop() { ATOMIC_STORE(&is_stop_, true); } // when remove ls, set this
|
||||||
int safe_to_destroy(bool &is_safe);
|
int safe_to_destroy(bool &is_safe);
|
||||||
|
int offline();
|
||||||
|
int online();
|
||||||
bool is_stop() { return is_stop_; }
|
bool is_stop() { return is_stop_; }
|
||||||
int update_rebuild_seq();
|
int update_rebuild_seq();
|
||||||
int64_t get_rebuild_seq();
|
int64_t get_rebuild_seq();
|
||||||
private:
|
private:
|
||||||
|
int cancel_task_();
|
||||||
int check_before_do_restore_(bool &can_do_restore);
|
int check_before_do_restore_(bool &can_do_restore);
|
||||||
int update_state_handle_();
|
int update_state_handle_();
|
||||||
int check_meta_tenant_normal_(bool &is_normal);
|
int check_meta_tenant_normal_(bool &is_normal);
|
||||||
@ -101,14 +104,15 @@ private:
|
|||||||
int fill_restore_arg_();
|
int fill_restore_arg_();
|
||||||
private:
|
private:
|
||||||
bool is_inited_;
|
bool is_inited_;
|
||||||
bool is_stop_;
|
bool is_stop_; // used by ls destory
|
||||||
|
bool is_online_; // used by ls online/offline
|
||||||
|
int64_t rebuild_seq_; // update by rebuild
|
||||||
lib::ObMutex mtx_;
|
lib::ObMutex mtx_;
|
||||||
ObLSRestoreResultMgr result_mgr_;
|
ObLSRestoreResultMgr result_mgr_;
|
||||||
storage::ObLS *ls_;
|
storage::ObLS *ls_;
|
||||||
ObTenantRestoreCtx ls_restore_arg_;
|
ObTenantRestoreCtx ls_restore_arg_;
|
||||||
ObILSRestoreState *state_handler_;
|
ObILSRestoreState *state_handler_;
|
||||||
common::ObFIFOAllocator allocator_;
|
common::ObFIFOAllocator allocator_;
|
||||||
int64_t rebuild_seq_;
|
|
||||||
DISALLOW_COPY_AND_ASSIGN(ObLSRestoreHandler);
|
DISALLOW_COPY_AND_ASSIGN(ObLSRestoreHandler);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -171,8 +175,8 @@ protected:
|
|||||||
int report_ls_restore_progress_(storage::ObLS &ls, const share::ObLSRestoreStatus &status,
|
int report_ls_restore_progress_(storage::ObLS &ls, const share::ObLSRestoreStatus &status,
|
||||||
const share::ObTaskId &trace_id, const int result = OB_SUCCESS, const char *comment = "");
|
const share::ObTaskId &trace_id, const int result = OB_SUCCESS, const char *comment = "");
|
||||||
|
|
||||||
int enable_replay_();
|
int online_();
|
||||||
void disable_replay_();
|
void offline_();
|
||||||
int update_restore_status_(
|
int update_restore_status_(
|
||||||
storage::ObLS &ls,
|
storage::ObLS &ls,
|
||||||
const share::ObLSRestoreStatus &next_status);
|
const share::ObLSRestoreStatus &next_status);
|
||||||
|
|||||||
@ -209,10 +209,9 @@ int ObLSRestoreTaskMgr::schedule_tablet(const ObTaskId &task_id, const ObSArray<
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ObLSRestoreTaskMgr::check_all_task_done(bool &is_all_done)
|
int ObLSRestoreTaskMgr::cancel_task()
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
is_all_done = true;
|
|
||||||
bool is_exist = false;
|
bool is_exist = false;
|
||||||
if (IS_NOT_INIT) {
|
if (IS_NOT_INIT) {
|
||||||
ret = OB_NOT_INIT;
|
ret = OB_NOT_INIT;
|
||||||
@ -221,12 +220,37 @@ int ObLSRestoreTaskMgr::check_all_task_done(bool &is_all_done)
|
|||||||
lib::ObMutexGuard guard(mtx_);
|
lib::ObMutexGuard guard(mtx_);
|
||||||
TaskMap::iterator iter = tablet_map_.begin();
|
TaskMap::iterator iter = tablet_map_.begin();
|
||||||
for (; OB_SUCC(ret) && iter != tablet_map_.end(); ++iter) {
|
for (; OB_SUCC(ret) && iter != tablet_map_.end(); ++iter) {
|
||||||
|
is_exist = false;
|
||||||
if (OB_FAIL(check_task_exist_(iter->first, is_exist))) {
|
if (OB_FAIL(check_task_exist_(iter->first, is_exist))) {
|
||||||
LOG_WARN("fail to check task exist", K(ret), "taks_id", iter->first);
|
LOG_WARN("fail to check task exist", K(ret), "taks_id", iter->first);
|
||||||
} else if (is_exist) {
|
} else if (is_exist) {
|
||||||
is_all_done = false;
|
ObTenantDagScheduler *scheduler = nullptr;
|
||||||
|
if (OB_ISNULL(scheduler = MTL(ObTenantDagScheduler*))) {
|
||||||
|
ret = OB_ERR_UNEXPECTED;
|
||||||
|
LOG_WARN("failed to get ObTenantDagScheduler from MTL", K(ret), KP(scheduler));
|
||||||
|
} else if (OB_FAIL(scheduler->cancel_dag_net(iter->first))) {
|
||||||
|
LOG_WARN("failed to check dag net exist", K(ret), K(iter->first));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int64_t start_ts = ObTimeUtil::current_time();
|
||||||
|
for (; OB_SUCC(ret) && iter != tablet_map_.end(); ++iter) {
|
||||||
|
is_exist = true;
|
||||||
|
do {
|
||||||
|
if (OB_FAIL(check_task_exist_(iter->first, is_exist))) {
|
||||||
|
LOG_WARN("fail to check task exist", K(ret), "taks_id", iter->first);
|
||||||
|
} else if (is_exist && REACH_TIME_INTERVAL(60 * 1000 * 1000)) {
|
||||||
|
LOG_WARN("cancel dag next task cost too much time", K(ret), "task_id", iter->first,
|
||||||
|
"cost_time", ObTimeUtil::current_time() - start_ts);
|
||||||
|
}
|
||||||
|
} while (is_exist && OB_SUCC(ret));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OB_SUCC(ret)) {
|
||||||
|
reuse_set();
|
||||||
|
tablet_map_.reuse();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -41,7 +41,7 @@ public:
|
|||||||
int schedule_tablet(const share::ObTaskId &task_id, const ObSArray<common::ObTabletID> &tablet_need_restore, bool &reach_dag_limit);
|
int schedule_tablet(const share::ObTaskId &task_id, const ObSArray<common::ObTabletID> &tablet_need_restore, bool &reach_dag_limit);
|
||||||
int pop_need_restore_tablets(ObIArray<common::ObTabletID> &need_restore_tablets);
|
int pop_need_restore_tablets(ObIArray<common::ObTabletID> &need_restore_tablets);
|
||||||
int pop_restored_tablets(storage::ObLS &ls, ObIArray<common::ObTabletID> &tablet_send_to_follower);
|
int pop_restored_tablets(storage::ObLS &ls, ObIArray<common::ObTabletID> &tablet_send_to_follower);
|
||||||
int check_all_task_done(bool &is_all_done);
|
int cancel_task();
|
||||||
|
|
||||||
void reuse_set() { schedule_tablet_set_.reuse(); wait_tablet_set_.reuse(); }
|
void reuse_set() { schedule_tablet_set_.reuse(); wait_tablet_set_.reuse(); }
|
||||||
void reuse_wait_set() { wait_tablet_set_.reuse(); }
|
void reuse_wait_set() { wait_tablet_set_.reuse(); }
|
||||||
|
|||||||
@ -460,14 +460,22 @@ int ObLSService::create_ls(const obrpc::ObCreateLSArg &arg)
|
|||||||
} else {
|
} else {
|
||||||
state = ObLSCreateState::CREATE_STATE_FINISH;
|
state = ObLSCreateState::CREATE_STATE_FINISH;
|
||||||
ls->finish_create(is_commit);
|
ls->finish_create(is_commit);
|
||||||
if (OB_SUCCESS != (tmp_ret = ls->start())) {
|
if (OB_FAIL(ls->start())) {
|
||||||
LOG_ERROR("ls start failed", K(tmp_ret), K(arg));
|
LOG_ERROR("ls start failed", K(ret), K(arg));
|
||||||
} else {
|
} else if (is_ls_to_restore_(arg)) {
|
||||||
FLOG_INFO("add ls to ls service succ", K(ls->get_ls_id()), K(arg));
|
if (OB_FAIL(ls->offline_without_lock())) {
|
||||||
if (OB_SUCCESS != (tmp_ret = ls->report_replica_info())) {
|
LOG_WARN("failed to offline", K(ret), K(arg));
|
||||||
LOG_WARN("fail to report ls", KR(tmp_ret), K(arg));
|
} else if (OB_FAIL(ls->get_log_handler()->enable_sync())) {
|
||||||
|
LOG_WARN("failed to enable sync", K(ret), K(arg));
|
||||||
|
} else if (OB_FAIL(ls->get_ls_restore_handler()->online())) {
|
||||||
|
LOG_WARN("failed to online restore handler", K(ret), K(arg));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FLOG_INFO("add ls to ls service succ", K(ls->get_ls_id()), K(arg));
|
||||||
|
if (OB_SUCCESS != (tmp_ret = ls->report_replica_info())) {
|
||||||
|
LOG_WARN("fail to report ls", KR(tmp_ret), K(arg));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (OB_FAIL(ret)) {
|
if (OB_FAIL(ret)) {
|
||||||
do {
|
do {
|
||||||
|
|||||||
Reference in New Issue
Block a user