Fix the bug that the standby database transfer is stuck in the doing state
This commit is contained in:
@ -1009,6 +1009,8 @@ int ObStartCompleteMigrationTask::wait_log_replay_sync_()
|
|||||||
//TODO(muwei.ym) MAKE THIS TIME PARAM as hide configuration iterms
|
//TODO(muwei.ym) MAKE THIS TIME PARAM as hide configuration iterms
|
||||||
bool need_wait = false;
|
bool need_wait = false;
|
||||||
bool is_done = false;
|
bool is_done = false;
|
||||||
|
const bool is_primay_tenant = MTL_IS_PRIMARY_TENANT();
|
||||||
|
share::SCN readable_scn;
|
||||||
|
|
||||||
if (!is_inited_) {
|
if (!is_inited_) {
|
||||||
ret = OB_NOT_INIT;
|
ret = OB_NOT_INIT;
|
||||||
@ -1029,6 +1031,8 @@ int ObStartCompleteMigrationTask::wait_log_replay_sync_()
|
|||||||
LOG_WARN("failed to check need wait log replay", K(ret), KPC(ctx_));
|
LOG_WARN("failed to check need wait log replay", K(ret), KPC(ctx_));
|
||||||
} else if (!need_wait) {
|
} else if (!need_wait) {
|
||||||
FLOG_INFO("no need wait replay log sync", KPC(ctx_));
|
FLOG_INFO("no need wait replay log sync", KPC(ctx_));
|
||||||
|
} else if (!is_primay_tenant && OB_FAIL(ObStorageHAUtils::get_readable_scn_with_retry(readable_scn))) {
|
||||||
|
LOG_WARN("failed to get readable scn", K(ret), KPC(ctx_));
|
||||||
} else {
|
} else {
|
||||||
#ifdef ERRSIM
|
#ifdef ERRSIM
|
||||||
SERVER_EVENT_SYNC_ADD("storage_ha", "wait_log_replay_sync",
|
SERVER_EVENT_SYNC_ADD("storage_ha", "wait_log_replay_sync",
|
||||||
@ -1060,7 +1064,14 @@ int ObStartCompleteMigrationTask::wait_log_replay_sync_()
|
|||||||
LOG_INFO("wait replay log ts ns success, stop wait", "arg", ctx_->arg_, K(cost_ts));
|
LOG_INFO("wait replay log ts ns success, stop wait", "arg", ctx_->arg_, K(cost_ts));
|
||||||
} else if (OB_FAIL(ls->get_max_decided_scn(current_replay_scn))) {
|
} else if (OB_FAIL(ls->get_max_decided_scn(current_replay_scn))) {
|
||||||
LOG_WARN("failed to get current replay log ts", K(ret), KPC(ctx_));
|
LOG_WARN("failed to get current replay log ts", K(ret), KPC(ctx_));
|
||||||
} else {
|
} else if (!is_primay_tenant && current_replay_scn >= readable_scn) {
|
||||||
|
wait_log_replay_success = true;
|
||||||
|
const int64_t cost_ts = ObTimeUtility::current_time() - wait_replay_start_ts;
|
||||||
|
LOG_INFO("wait replay log ts ns success, stop wait", "arg", ctx_->arg_, K(cost_ts),
|
||||||
|
K(is_primay_tenant), K(current_replay_scn), K(readable_scn));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OB_SUCC(ret) && !wait_log_replay_success) {
|
||||||
current_ts = ObTimeUtility::current_time();
|
current_ts = ObTimeUtility::current_time();
|
||||||
bool is_timeout = false;
|
bool is_timeout = false;
|
||||||
if (REACH_TENANT_TIME_INTERVAL(60 * 1000 * 1000)) {
|
if (REACH_TENANT_TIME_INTERVAL(60 * 1000 * 1000)) {
|
||||||
|
|||||||
@ -246,25 +246,69 @@ int ObStorageHAUtils::check_transfer_ls_can_rebuild(
|
|||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
SCN readable_scn = SCN::base_scn();
|
SCN readable_scn = SCN::base_scn();
|
||||||
rootserver::ObTenantInfoLoader *info = MTL(rootserver::ObTenantInfoLoader*);
|
|
||||||
need_rebuild = false;
|
need_rebuild = false;
|
||||||
if (!replay_scn.is_valid()) {
|
if (!replay_scn.is_valid()) {
|
||||||
ret = OB_INVALID_ARGUMENT;
|
ret = OB_INVALID_ARGUMENT;
|
||||||
LOG_WARN("argument invalid", K(ret), K(replay_scn));
|
LOG_WARN("argument invalid", K(ret), K(replay_scn));
|
||||||
} else if (OB_ISNULL(info)) {
|
|
||||||
ret = OB_ERR_UNEXPECTED;
|
|
||||||
LOG_WARN("tenant info is null", K(ret), K(replay_scn));
|
|
||||||
} else if (MTL_IS_PRIMARY_TENANT()) {
|
} else if (MTL_IS_PRIMARY_TENANT()) {
|
||||||
need_rebuild = true;
|
need_rebuild = true;
|
||||||
|
} else if (OB_FAIL(get_readable_scn_(readable_scn))) {
|
||||||
|
LOG_WARN("failed to get readable scn", K(ret), K(replay_scn));
|
||||||
|
} else if (readable_scn >= replay_scn) {
|
||||||
|
need_rebuild = true;
|
||||||
|
} else {
|
||||||
|
need_rebuild = false;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ObStorageHAUtils::get_readable_scn_with_retry(share::SCN &readable_scn)
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
readable_scn.set_base();
|
||||||
|
rootserver::ObTenantInfoLoader *info = MTL(rootserver::ObTenantInfoLoader*);
|
||||||
|
const int64_t GET_READABLE_SCN_INTERVAL = 100 * 1000; // 100ms
|
||||||
|
const int64_t GET_REABLE_SCN_TIMEOUT = 9 * 1000 * 1000; // 9s
|
||||||
|
|
||||||
|
if (OB_ISNULL(info)) {
|
||||||
|
ret = OB_ERR_UNEXPECTED;
|
||||||
|
LOG_WARN("tenant info is null", K(ret), KP(info));
|
||||||
|
} else {
|
||||||
|
const int64_t start_ts = ObTimeUtility::current_time();
|
||||||
|
while (OB_SUCC(ret)) {
|
||||||
|
if (OB_FAIL(get_readable_scn_(readable_scn))) {
|
||||||
|
LOG_WARN("failed to get readable scn", K(ret));
|
||||||
|
if (OB_EAGAIN == ret) {
|
||||||
|
//overwrite ret
|
||||||
|
if (ObTimeUtil::current_time() - start_ts >= GET_REABLE_SCN_TIMEOUT) {
|
||||||
|
ret = OB_TIMEOUT;
|
||||||
|
LOG_WARN("get valid readable scn timeout", K(ret), K(readable_scn));
|
||||||
|
} else {
|
||||||
|
ret = OB_SUCCESS;
|
||||||
|
ob_usleep(GET_READABLE_SCN_INTERVAL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ObStorageHAUtils::get_readable_scn_(share::SCN &readable_scn)
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
readable_scn.set_base();
|
||||||
|
rootserver::ObTenantInfoLoader *info = MTL(rootserver::ObTenantInfoLoader*);
|
||||||
|
if (OB_ISNULL(info)) {
|
||||||
|
ret = OB_ERR_UNEXPECTED;
|
||||||
|
LOG_WARN("tenant info is null", K(ret), KP(info));
|
||||||
} else if (OB_FAIL(info->get_readable_scn(readable_scn))) {
|
} else if (OB_FAIL(info->get_readable_scn(readable_scn))) {
|
||||||
LOG_WARN("failed to get readable scn", K(ret), K(readable_scn));
|
LOG_WARN("failed to get readable scn", K(ret), K(readable_scn));
|
||||||
} else if (!readable_scn.is_valid()) {
|
} else if (!readable_scn.is_valid()) {
|
||||||
ret = OB_EAGAIN;
|
ret = OB_EAGAIN;
|
||||||
LOG_WARN("readable_scn not valid", K(ret), K(readable_scn));
|
LOG_WARN("readable_scn not valid", K(ret), K(readable_scn));
|
||||||
} else if (readable_scn >= replay_scn) {
|
|
||||||
need_rebuild = true;
|
|
||||||
} else {
|
|
||||||
need_rebuild = false;
|
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -42,6 +42,7 @@ public:
|
|||||||
static int check_transfer_ls_can_rebuild(
|
static int check_transfer_ls_can_rebuild(
|
||||||
const share::SCN replay_scn,
|
const share::SCN replay_scn,
|
||||||
bool &need_rebuild);
|
bool &need_rebuild);
|
||||||
|
static int get_readable_scn_with_retry(share::SCN &readable_scn);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static int check_merge_error_(const uint64_t tenant_id, common::ObISQLClient &sql_client);
|
static int check_merge_error_(const uint64_t tenant_id, common::ObISQLClient &sql_client);
|
||||||
@ -50,6 +51,7 @@ private:
|
|||||||
share::SCN &compaction_scn);
|
share::SCN &compaction_scn);
|
||||||
static int check_tablet_replica_checksum_(const uint64_t tenant_id, const common::ObTabletID &tablet_id,
|
static int check_tablet_replica_checksum_(const uint64_t tenant_id, const common::ObTabletID &tablet_id,
|
||||||
const share::ObLSID &ls_id, const share::SCN &compaction_scn, common::ObISQLClient &sql_client);
|
const share::ObLSID &ls_id, const share::SCN &compaction_scn, common::ObISQLClient &sql_client);
|
||||||
|
static int get_readable_scn_(share::SCN &readable_scn);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ObTransferUtils
|
struct ObTransferUtils
|
||||||
|
|||||||
Reference in New Issue
Block a user