Fix the bug that the standby database transfer is stuck in the doing state
This commit is contained in:
@ -1009,6 +1009,8 @@ int ObStartCompleteMigrationTask::wait_log_replay_sync_()
|
||||
//TODO(muwei.ym) MAKE THIS TIME PARAM as hide configuration iterms
|
||||
bool need_wait = false;
|
||||
bool is_done = false;
|
||||
const bool is_primay_tenant = MTL_IS_PRIMARY_TENANT();
|
||||
share::SCN readable_scn;
|
||||
|
||||
if (!is_inited_) {
|
||||
ret = OB_NOT_INIT;
|
||||
@ -1029,6 +1031,8 @@ int ObStartCompleteMigrationTask::wait_log_replay_sync_()
|
||||
LOG_WARN("failed to check need wait log replay", K(ret), KPC(ctx_));
|
||||
} else if (!need_wait) {
|
||||
FLOG_INFO("no need wait replay log sync", KPC(ctx_));
|
||||
} else if (!is_primay_tenant && OB_FAIL(ObStorageHAUtils::get_readable_scn_with_retry(readable_scn))) {
|
||||
LOG_WARN("failed to get readable scn", K(ret), KPC(ctx_));
|
||||
} else {
|
||||
#ifdef ERRSIM
|
||||
SERVER_EVENT_SYNC_ADD("storage_ha", "wait_log_replay_sync",
|
||||
@ -1060,7 +1064,14 @@ int ObStartCompleteMigrationTask::wait_log_replay_sync_()
|
||||
LOG_INFO("wait replay log ts ns success, stop wait", "arg", ctx_->arg_, K(cost_ts));
|
||||
} else if (OB_FAIL(ls->get_max_decided_scn(current_replay_scn))) {
|
||||
LOG_WARN("failed to get current replay log ts", K(ret), KPC(ctx_));
|
||||
} else {
|
||||
} else if (!is_primay_tenant && current_replay_scn >= readable_scn) {
|
||||
wait_log_replay_success = true;
|
||||
const int64_t cost_ts = ObTimeUtility::current_time() - wait_replay_start_ts;
|
||||
LOG_INFO("wait replay log ts ns success, stop wait", "arg", ctx_->arg_, K(cost_ts),
|
||||
K(is_primay_tenant), K(current_replay_scn), K(readable_scn));
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret) && !wait_log_replay_success) {
|
||||
current_ts = ObTimeUtility::current_time();
|
||||
bool is_timeout = false;
|
||||
if (REACH_TENANT_TIME_INTERVAL(60 * 1000 * 1000)) {
|
||||
|
||||
@ -246,25 +246,69 @@ int ObStorageHAUtils::check_transfer_ls_can_rebuild(
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
SCN readable_scn = SCN::base_scn();
|
||||
rootserver::ObTenantInfoLoader *info = MTL(rootserver::ObTenantInfoLoader*);
|
||||
need_rebuild = false;
|
||||
if (!replay_scn.is_valid()) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("argument invalid", K(ret), K(replay_scn));
|
||||
} else if (OB_ISNULL(info)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("tenant info is null", K(ret), K(replay_scn));
|
||||
} else if (MTL_IS_PRIMARY_TENANT()) {
|
||||
need_rebuild = true;
|
||||
} else if (OB_FAIL(get_readable_scn_(readable_scn))) {
|
||||
LOG_WARN("failed to get readable scn", K(ret), K(replay_scn));
|
||||
} else if (readable_scn >= replay_scn) {
|
||||
need_rebuild = true;
|
||||
} else {
|
||||
need_rebuild = false;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObStorageHAUtils::get_readable_scn_with_retry(share::SCN &readable_scn)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
readable_scn.set_base();
|
||||
rootserver::ObTenantInfoLoader *info = MTL(rootserver::ObTenantInfoLoader*);
|
||||
const int64_t GET_READABLE_SCN_INTERVAL = 100 * 1000; // 100ms
|
||||
const int64_t GET_REABLE_SCN_TIMEOUT = 9 * 1000 * 1000; // 9s
|
||||
|
||||
if (OB_ISNULL(info)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("tenant info is null", K(ret), KP(info));
|
||||
} else {
|
||||
const int64_t start_ts = ObTimeUtility::current_time();
|
||||
while (OB_SUCC(ret)) {
|
||||
if (OB_FAIL(get_readable_scn_(readable_scn))) {
|
||||
LOG_WARN("failed to get readable scn", K(ret));
|
||||
if (OB_EAGAIN == ret) {
|
||||
//overwrite ret
|
||||
if (ObTimeUtil::current_time() - start_ts >= GET_REABLE_SCN_TIMEOUT) {
|
||||
ret = OB_TIMEOUT;
|
||||
LOG_WARN("get valid readable scn timeout", K(ret), K(readable_scn));
|
||||
} else {
|
||||
ret = OB_SUCCESS;
|
||||
ob_usleep(GET_READABLE_SCN_INTERVAL);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObStorageHAUtils::get_readable_scn_(share::SCN &readable_scn)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
readable_scn.set_base();
|
||||
rootserver::ObTenantInfoLoader *info = MTL(rootserver::ObTenantInfoLoader*);
|
||||
if (OB_ISNULL(info)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("tenant info is null", K(ret), KP(info));
|
||||
} else if (OB_FAIL(info->get_readable_scn(readable_scn))) {
|
||||
LOG_WARN("failed to get readable scn", K(ret), K(readable_scn));
|
||||
} else if (!readable_scn.is_valid()) {
|
||||
ret = OB_EAGAIN;
|
||||
LOG_WARN("readable_scn not valid", K(ret), K(readable_scn));
|
||||
} else if (readable_scn >= replay_scn) {
|
||||
need_rebuild = true;
|
||||
} else {
|
||||
need_rebuild = false;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -42,6 +42,7 @@ public:
|
||||
static int check_transfer_ls_can_rebuild(
|
||||
const share::SCN replay_scn,
|
||||
bool &need_rebuild);
|
||||
static int get_readable_scn_with_retry(share::SCN &readable_scn);
|
||||
|
||||
private:
|
||||
static int check_merge_error_(const uint64_t tenant_id, common::ObISQLClient &sql_client);
|
||||
@ -50,6 +51,7 @@ private:
|
||||
share::SCN &compaction_scn);
|
||||
static int check_tablet_replica_checksum_(const uint64_t tenant_id, const common::ObTabletID &tablet_id,
|
||||
const share::ObLSID &ls_id, const share::SCN &compaction_scn, common::ObISQLClient &sql_client);
|
||||
static int get_readable_scn_(share::SCN &readable_scn);
|
||||
};
|
||||
|
||||
struct ObTransferUtils
|
||||
|
||||
Reference in New Issue
Block a user