[4.2] fix paxos replica number problem
This commit is contained in:
@ -919,6 +919,7 @@ int PalfHandleImpl::replace_learners(const common::ObMemberList &added_learners,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ERRSIM_POINT_DEF(ERRSIM_REPLACE_MEMBER_NOT_REMOVE_ERROR);
|
||||||
int PalfHandleImpl::replace_member_with_learner(const common::ObMember &added_member,
|
int PalfHandleImpl::replace_member_with_learner(const common::ObMember &added_member,
|
||||||
const common::ObMember &removed_member,
|
const common::ObMember &removed_member,
|
||||||
const palf::LogConfigVersion &config_version,
|
const palf::LogConfigVersion &config_version,
|
||||||
@ -944,6 +945,8 @@ int PalfHandleImpl::replace_member_with_learner(const common::ObMember &added_me
|
|||||||
PALF_LOG(WARN, "get_curr_member_list failed", KR(ret), KPC(this));
|
PALF_LOG(WARN, "get_curr_member_list failed", KR(ret), KPC(this));
|
||||||
} else if (OB_FAIL(one_stage_config_change_(args, timeout_us))) {
|
} else if (OB_FAIL(one_stage_config_change_(args, timeout_us))) {
|
||||||
PALF_LOG(WARN, "add_member in replace_member_with_learner failed", KR(ret), KPC(this), K(args));
|
PALF_LOG(WARN, "add_member in replace_member_with_learner failed", KR(ret), KPC(this), K(args));
|
||||||
|
} else if (OB_UNLIKELY(ERRSIM_REPLACE_MEMBER_NOT_REMOVE_ERROR)) {
|
||||||
|
// do nothing
|
||||||
} else if (FALSE_IT(args.server_ = removed_member)) {
|
} else if (FALSE_IT(args.server_ = removed_member)) {
|
||||||
} else if (FALSE_IT(args.type_ = REMOVE_MEMBER_AND_NUM)) {
|
} else if (FALSE_IT(args.type_ = REMOVE_MEMBER_AND_NUM)) {
|
||||||
} else if (OB_FAIL(one_stage_config_change_(args, timeout_us + begin_time_us - common::ObTimeUtility::current_time()))) {
|
} else if (OB_FAIL(one_stage_config_change_(args, timeout_us + begin_time_us - common::ObTimeUtility::current_time()))) {
|
||||||
|
|||||||
@ -102,6 +102,7 @@ static const char* disaster_recovery_task_ret_comment_strs[] = {
|
|||||||
"[rs] task can not execute because server is not alive",
|
"[rs] task can not execute because server is not alive",
|
||||||
"[rs] task can not execute because fail to check paxos replica number",
|
"[rs] task can not execute because fail to check paxos replica number",
|
||||||
"[rs] task can not execute because replica is not in service",
|
"[rs] task can not execute because replica is not in service",
|
||||||
|
"[rs] task can not execute because server is permanent offline",
|
||||||
""/*default max*/
|
""/*default max*/
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -678,13 +679,13 @@ int ObMigrateLSReplicaTask::check_paxos_number(
|
|||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
const ObLSReplica *leader = nullptr;
|
const ObLSReplica *leader = nullptr;
|
||||||
if (OB_FAIL(ls_info.find_leader(leader))) {
|
if (OB_FAIL(ls_info.find_leader(leader))) {
|
||||||
LOG_WARN("fail to get leader", K(ret));
|
LOG_WARN("fail to get leader", KR(ret), K(ls_info));
|
||||||
} else if (OB_UNLIKELY(nullptr == leader)) {
|
} else if (OB_ISNULL(leader)) {
|
||||||
ret = OB_ERR_UNEXPECTED;
|
ret = OB_ERR_UNEXPECTED;
|
||||||
LOG_WARN("leader replica is null", KR(ret));
|
LOG_WARN("leader replica is null", KR(ret));
|
||||||
} else if (leader->get_paxos_replica_number() <= 0) {
|
} else if (leader->get_paxos_replica_number() <= 0) {
|
||||||
ret = OB_REBALANCE_TASK_CANT_EXEC;
|
ret = OB_REBALANCE_TASK_CANT_EXEC;
|
||||||
LOG_WARN("paxos replica number not report", K(ret), KPC(leader));
|
LOG_WARN("paxos replica number not report", KR(ret), KPC(leader));
|
||||||
} else if (leader->get_paxos_replica_number() != paxos_replica_number_) {
|
} else if (leader->get_paxos_replica_number() != paxos_replica_number_) {
|
||||||
ret = OB_REBALANCE_TASK_CANT_EXEC;
|
ret = OB_REBALANCE_TASK_CANT_EXEC;
|
||||||
LOG_WARN("paxos replica number not match", KR(ret),
|
LOG_WARN("paxos replica number not match", KR(ret),
|
||||||
|
|||||||
@ -68,6 +68,7 @@ enum ObDRTaskRetComment
|
|||||||
CANNOT_EXECUTE_DUE_TO_SERVER_NOT_ALIVE = 6,
|
CANNOT_EXECUTE_DUE_TO_SERVER_NOT_ALIVE = 6,
|
||||||
CANNOT_EXECUTE_DUE_TO_PAXOS_REPLICA_NUMBER = 7,
|
CANNOT_EXECUTE_DUE_TO_PAXOS_REPLICA_NUMBER = 7,
|
||||||
CANNOT_EXECUTE_DUE_TO_REPLICA_NOT_INSERVICE = 8,
|
CANNOT_EXECUTE_DUE_TO_REPLICA_NOT_INSERVICE = 8,
|
||||||
|
CANNOT_EXECUTE_DUE_TO_SERVER_PERMANENT_OFFLINE = 9,
|
||||||
MAX
|
MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -718,12 +718,30 @@ void ObDRTaskMgr::run3()
|
|||||||
if (OB_FAIL(try_pop_task(allocator, task))) {
|
if (OB_FAIL(try_pop_task(allocator, task))) {
|
||||||
LOG_WARN("fail to try pop task", KR(ret));
|
LOG_WARN("fail to try pop task", KR(ret));
|
||||||
} else if (OB_NOT_NULL(task)) {
|
} else if (OB_NOT_NULL(task)) {
|
||||||
tmp_ret = task->log_execute_start();
|
const ObAddr &dst_server = task->get_dst_server();
|
||||||
if (OB_SUCCESS != tmp_ret) {
|
ObServerInfoInTable server_info;
|
||||||
LOG_WARN("fail to log task start", KR(tmp_ret), KPC(task));
|
if (OB_FAIL(SVR_TRACER.get_server_info(dst_server, server_info))) {
|
||||||
}
|
LOG_WARN("fail to get server_info", KR(ret), K(dst_server));
|
||||||
if (OB_FAIL(execute_task(*task))) {
|
} else if (server_info.is_permanent_offline()) {
|
||||||
LOG_WARN("fail to send", KR(ret), KPC(task));
|
// dest server permanent offline, do not execute this task, just clean it
|
||||||
|
LOG_INFO("[DRTASK_NOTICE] dest server is permanent offline, task can not execute", K(dst_server), K(server_info));
|
||||||
|
ObThreadCondGuard guard(cond_);
|
||||||
|
if (OB_SUCCESS != (tmp_ret = async_add_cleaning_task_to_updater(
|
||||||
|
task->get_task_id(),
|
||||||
|
task->get_task_key(),
|
||||||
|
OB_REBALANCE_TASK_CANT_EXEC,
|
||||||
|
false/*need_record_event*/,
|
||||||
|
ObDRTaskRetComment::CANNOT_EXECUTE_DUE_TO_SERVER_PERMANENT_OFFLINE,
|
||||||
|
false/*reach_data_copy_concurrency*/))) {
|
||||||
|
LOG_WARN("fail to do execute over", KR(tmp_ret), KPC(task));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (OB_SUCCESS != (tmp_ret = task->log_execute_start())) {
|
||||||
|
LOG_WARN("fail to log task start", KR(tmp_ret), KPC(task));
|
||||||
|
}
|
||||||
|
if (OB_FAIL(execute_task(*task))) {
|
||||||
|
LOG_WARN("fail to send", KR(ret), KPC(task));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
free_task_(allocator, task);
|
free_task_(allocator, task);
|
||||||
} else {
|
} else {
|
||||||
@ -734,7 +752,7 @@ void ObDRTaskMgr::run3()
|
|||||||
LOG_WARN("fail to try dump statistic", KR(tmp_ret), K(last_dump_ts));
|
LOG_WARN("fail to try dump statistic", KR(tmp_ret), K(last_dump_ts));
|
||||||
}
|
}
|
||||||
if (OB_SUCCESS != (tmp_ret = try_clean_not_in_schedule_task_in_schedule_list_(
|
if (OB_SUCCESS != (tmp_ret = try_clean_not_in_schedule_task_in_schedule_list_(
|
||||||
last_check_task_in_progress_ts))) {
|
last_check_task_in_progress_ts))) {
|
||||||
LOG_WARN("fail to try check task in progress", KR(tmp_ret), K(last_check_task_in_progress_ts));
|
LOG_WARN("fail to try check task in progress", KR(tmp_ret), K(last_check_task_in_progress_ts));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4656,7 +4656,10 @@ int ObDRWorker::generate_disaster_recovery_paxos_replica_number(
|
|||||||
if (locality_paxos_replica_number >= member_list_cnt_after) {
|
if (locality_paxos_replica_number >= member_list_cnt_after) {
|
||||||
new_paxos_replica_number = curr_paxos_replica_number;
|
new_paxos_replica_number = curr_paxos_replica_number;
|
||||||
found = true;
|
found = true;
|
||||||
} else {} // new member cnt greater than paxos_replica_number, not good
|
} else if (locality_paxos_replica_number + 1 == member_list_cnt_after) {
|
||||||
|
new_paxos_replica_number = curr_paxos_replica_number + 1;
|
||||||
|
found = true;
|
||||||
|
}
|
||||||
} else if (curr_paxos_replica_number > locality_paxos_replica_number) {
|
} else if (curr_paxos_replica_number > locality_paxos_replica_number) {
|
||||||
if (curr_paxos_replica_number >= member_list_cnt_after) {
|
if (curr_paxos_replica_number >= member_list_cnt_after) {
|
||||||
new_paxos_replica_number = curr_paxos_replica_number;
|
new_paxos_replica_number = curr_paxos_replica_number;
|
||||||
|
|||||||
Reference in New Issue
Block a user