fix misuse of async rpc while create ls

This commit is contained in:
maosy
2023-01-06 08:38:08 +00:00
committed by ob-robot
parent cb953e3049
commit 4dfd67d8fb
5 changed files with 48 additions and 26 deletions

View File

@ -654,11 +654,16 @@ int ObAdminRefreshSchema::call_server(const ObAddr &server)
arg.schema_info_ = schema_info_; arg.schema_info_ = schema_info_;
ObArray<int> return_code_array; ObArray<int> return_code_array;
ObSwitchSchemaProxy proxy(*GCTX.srv_rpc_proxy_, &ObSrvRpcProxy::switch_schema); ObSwitchSchemaProxy proxy(*GCTX.srv_rpc_proxy_, &ObSrvRpcProxy::switch_schema);
int tmp_ret = OB_SUCCESS;
const int64_t timeout_ts = ctx.get_timeout(0); const int64_t timeout_ts = ctx.get_timeout(0);
if (OB_FAIL(proxy.call(server, timeout_ts, arg))) { if (OB_FAIL(proxy.call(server, timeout_ts, arg))) {
LOG_WARN("notify switch schema failed", KR(ret), K(server), K_(schema_version), K_(schema_info)); LOG_WARN("notify switch schema failed", KR(ret), K(server), K_(schema_version), K_(schema_info));
} else if (OB_FAIL(proxy.wait_all(return_code_array))) { }
LOG_WARN("fail to wait all", KR(ret), K(server));
if (OB_TMP_FAIL(proxy.wait_all(return_code_array))) {
ret = OB_SUCC(ret) ? tmp_ret : ret;
LOG_WARN("fail to wait all", KR(ret), KR(tmp_ret), K(server));
} else if (OB_FAIL(ret)) {
} else if (OB_UNLIKELY(return_code_array.empty())) { } else if (OB_UNLIKELY(return_code_array.empty())) {
ret = OB_ERR_UNEXPECTED; ret = OB_ERR_UNEXPECTED;
LOG_WARN("return_code_array is empty", KR(ret), K(server)); LOG_WARN("return_code_array is empty", KR(ret), K(server));

View File

@ -420,10 +420,10 @@ int ObTenantRoleTransitionService::get_ls_access_mode_(ObIArray<LSAccessModeInfo
} else if (OB_FAIL(GCTX.location_service_->get_leader( } else if (OB_FAIL(GCTX.location_service_->get_leader(
GCONF.cluster_id, tenant_id_, info.ls_id_, false, leader))) { GCONF.cluster_id, tenant_id_, info.ls_id_, false, leader))) {
LOG_WARN("failed to get leader", KR(ret), K(tenant_id_), K(info)); LOG_WARN("failed to get leader", KR(ret), K(tenant_id_), K(info));
} else if (OB_FAIL(arg.init(tenant_id_, info.ls_id_))) { } else if (OB_FAIL(arg.init(tenant_id_, info.ls_id_))) {
LOG_WARN("failed to init arg", KR(ret), K(tenant_id_), K(info)); LOG_WARN("failed to init arg", KR(ret), K(tenant_id_), K(info));
} else if (OB_FAIL(proxy.call(leader, timeout, GCONF.cluster_id, tenant_id_, arg))) { } else if (OB_FAIL(proxy.call(leader, timeout, GCONF.cluster_id, tenant_id_, arg))) {
//can not ignore error of each ls
LOG_WARN("failed to send rpc", KR(ret), K(leader), K(timeout), K(tenant_id_), K(arg)); LOG_WARN("failed to send rpc", KR(ret), K(leader), K(timeout), K(tenant_id_), K(arg));
} else { } else {
rpc_count++; rpc_count++;
@ -514,14 +514,18 @@ int ObTenantRoleTransitionService::do_change_ls_access_mode_(const ObIArray<LSAc
if (OB_FAIL(arg.init(tenant_id_, info.ls_id_, info.mode_version_, target_access_mode, ref_scn))) { if (OB_FAIL(arg.init(tenant_id_, info.ls_id_, info.mode_version_, target_access_mode, ref_scn))) {
LOG_WARN("failed to init arg", KR(ret), K(info), K(target_access_mode), K(ref_scn)); LOG_WARN("failed to init arg", KR(ret), K(info), K(target_access_mode), K(ref_scn));
} else if (OB_FAIL(proxy.call(info.leader_addr_, timeout, GCONF.cluster_id, tenant_id_, arg))) { } else if (OB_FAIL(proxy.call(info.leader_addr_, timeout, GCONF.cluster_id, tenant_id_, arg))) {
//can not ignore of each ls
LOG_WARN("failed to send rpc", KR(ret), K(info), K(timeout), K(tenant_id_), K(arg)); LOG_WARN("failed to send rpc", KR(ret), K(info), K(timeout), K(tenant_id_), K(arg));
} }
}//end for }//end for
//result //result
ObArray<int> return_code_array; ObArray<int> return_code_array;
int tmp_ret = OB_SUCCESS;
const int64_t rpc_count = ls_access_info.count(); const int64_t rpc_count = ls_access_info.count();
if (FAILEDx(proxy.wait_all(return_code_array))) { if (OB_TMP_FAIL(proxy.wait_all(return_code_array))) {
LOG_WARN("wait all batch result failed", KR(ret)); ret = OB_SUCC(ret) ? tmp_ret : ret;
LOG_WARN("wait all batch result failed", KR(ret), KR(tmp_ret));
} else if (OB_FAIL(ret)) {
} else if (rpc_count != return_code_array.count() || } else if (rpc_count != return_code_array.count() ||
rpc_count != proxy.get_args().count() || rpc_count != proxy.get_args().count() ||
rpc_count != proxy.get_results().count()) { rpc_count != proxy.get_results().count()) {

View File

@ -366,6 +366,8 @@ int ObLSCreator::create_ls_(const ObILSAddr &addrs,
} else { } else {
obrpc::ObCreateLSArg arg; obrpc::ObCreateLSArg arg;
int64_t rpc_count = 0; int64_t rpc_count = 0;
int tmp_ret = OB_SUCCESS;
ObArray<int> return_code_array;
lib::Worker::CompatMode new_compat_mode = compat_mode == ORACLE_MODE ? lib::Worker::CompatMode new_compat_mode = compat_mode == ORACLE_MODE ?
lib::Worker::CompatMode::ORACLE : lib::Worker::CompatMode::ORACLE :
lib::Worker::CompatMode::MYSQL; lib::Worker::CompatMode::MYSQL;
@ -378,16 +380,21 @@ int ObLSCreator::create_ls_(const ObILSAddr &addrs,
create_with_palf, palf_base_info))) { create_with_palf, palf_base_info))) {
LOG_WARN("failed to init create log stream arg", KR(ret), K(addr), K(create_with_palf), LOG_WARN("failed to init create log stream arg", KR(ret), K(addr), K(create_with_palf),
K_(id), K_(tenant_id), K(tenant_info), K(create_scn), K(new_compat_mode), K(palf_base_info)); K_(id), K_(tenant_id), K(tenant_info), K(create_scn), K(new_compat_mode), K(palf_base_info));
} else if (OB_FAIL(create_ls_proxy_.call(addr.addr_, ctx.get_timeout(), } else if (OB_TMP_FAIL(create_ls_proxy_.call(addr.addr_, ctx.get_timeout(),
GCONF.cluster_id, tenant_id_, arg))) { GCONF.cluster_id, tenant_id_, arg))) {
LOG_WARN("failed to all async rpc", KR(ret), K(addr), K(ctx.get_timeout()), LOG_WARN("failed to all async rpc", KR(tmp_ret), K(addr), K(ctx.get_timeout()),
K(arg), K(tenant_id_)); K(arg), K(tenant_id_));
} else { } else {
rpc_count++; rpc_count++;
} }
} }
if (FAILEDx(check_create_ls_result_(rpc_count, paxos_replica_num, member_list))) { //wait all
LOG_WARN("failed to check ls result", KR(ret), K(rpc_count), K(paxos_replica_num)); if (OB_TMP_FAIL(create_ls_proxy_.wait_all(return_code_array))) {
ret = OB_SUCC(ret) ? tmp_ret : ret;
LOG_WARN("failed to wait all async rpc", KR(ret), KR(tmp_ret), K(rpc_count));
}
if (FAILEDx(check_create_ls_result_(rpc_count, paxos_replica_num, return_code_array, member_list))) {
LOG_WARN("failed to check ls result", KR(ret), K(rpc_count), K(paxos_replica_num), K(return_code_array));
} }
} }
} }
@ -396,18 +403,15 @@ int ObLSCreator::create_ls_(const ObILSAddr &addrs,
int ObLSCreator::check_create_ls_result_(const int64_t rpc_count, int ObLSCreator::check_create_ls_result_(const int64_t rpc_count,
const int64_t paxos_replica_num, const int64_t paxos_replica_num,
const ObIArray<int> &return_code_array,
common::ObMemberList &member_list) common::ObMemberList &member_list)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
ObArray<int> return_code_array;
member_list.reset(); member_list.reset();
if (OB_UNLIKELY(!is_valid())) { if (OB_UNLIKELY(!is_valid())) {
ret = OB_INVALID_ARGUMENT; ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret)); LOG_WARN("invalid argument", KR(ret));
} else if (OB_FAIL(create_ls_proxy_.wait_all(return_code_array))) {
LOG_WARN("wait all batch result failed", KR(ret));
} else if (rpc_count != return_code_array.count() } else if (rpc_count != return_code_array.count()
|| rpc_count != create_ls_proxy_.get_args().count()
|| rpc_count != create_ls_proxy_.get_results().count()) { || rpc_count != create_ls_proxy_.get_results().count()) {
ret = OB_ERR_UNEXPECTED; ret = OB_ERR_UNEXPECTED;
LOG_WARN("rpc count not equal to result count", KR(ret), K(rpc_count), LOG_WARN("rpc count not equal to result count", KR(ret), K(rpc_count),
@ -425,16 +429,15 @@ int ObLSCreator::check_create_ls_result_(const int64_t rpc_count,
} else if (OB_SUCCESS != result->get_result()) { } else if (OB_SUCCESS != result->get_result()) {
LOG_WARN("rpc is failed", KR(ret), K(*result), K(i)); LOG_WARN("rpc is failed", KR(ret), K(*result), K(i));
} else { } else {
const obrpc::ObCreateLSArg &arg = create_ls_proxy_.get_args().at(i);
const ObAddr &addr = create_ls_proxy_.get_dests().at(i); const ObAddr &addr = create_ls_proxy_.get_dests().at(i);
if (common::ObReplicaTypeCheck::is_paxos_replica_V2(arg.get_replica_type())) { //TODO other replica type
//can not get replica type from arg, arg and result is not match
if (OB_FAIL(member_list.add_member(ObMember(addr, timestamp)))) { if (OB_FAIL(member_list.add_member(ObMember(addr, timestamp)))) {
LOG_WARN("failed to add member", KR(ret), K(addr)); LOG_WARN("failed to add member", KR(ret), K(addr));
} }
} }
} }
} }
}
if (rootserver::majority(paxos_replica_num) > member_list.get_member_number()) { if (rootserver::majority(paxos_replica_num) > member_list.get_member_number()) {
ret = OB_REPLICA_NUM_NOT_ENOUGH; ret = OB_REPLICA_NUM_NOT_ENOUGH;
LOG_WARN("success count less than majority", KR(ret), K(paxos_replica_num), LOG_WARN("success count less than majority", KR(ret), K(paxos_replica_num),
@ -470,6 +473,7 @@ int ObLSCreator::set_member_list_(const common::ObMemberList &member_list,
const int64_t paxos_replica_num) const int64_t paxos_replica_num)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
DEBUG_SYNC(BEFORE_SET_LS_MEMBER_LIST);
if (OB_UNLIKELY(!is_valid())) { if (OB_UNLIKELY(!is_valid())) {
ret = OB_INVALID_ARGUMENT; ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret)); LOG_WARN("invalid argument", KR(ret));
@ -484,6 +488,8 @@ int ObLSCreator::set_member_list_(const common::ObMemberList &member_list,
} else { } else {
ObSetMemberListArgV2 arg; ObSetMemberListArgV2 arg;
int64_t rpc_count = 0; int64_t rpc_count = 0;
int tmp_ret = OB_SUCCESS;
ObArray<int> return_code_array;
if (OB_FAIL(arg.init(tenant_id_, id_, paxos_replica_num, member_list))) { if (OB_FAIL(arg.init(tenant_id_, id_, paxos_replica_num, member_list))) {
LOG_WARN("failed to init set member list arg", KR(ret), K_(id), K_(tenant_id), LOG_WARN("failed to init set member list arg", KR(ret), K_(id), K_(tenant_id),
K(paxos_replica_num), K(member_list)); K(paxos_replica_num), K(member_list));
@ -492,16 +498,23 @@ int ObLSCreator::set_member_list_(const common::ObMemberList &member_list,
ObAddr addr; ObAddr addr;
if (OB_FAIL(member_list.get_server_by_index(i, addr))) { if (OB_FAIL(member_list.get_server_by_index(i, addr))) {
LOG_WARN("failed to get member by index", KR(ret), K(i), K(member_list)); LOG_WARN("failed to get member by index", KR(ret), K(i), K(member_list));
} else if (OB_FAIL(set_member_list_proxy_.call(addr, ctx.get_timeout(), } else if (OB_TMP_FAIL(set_member_list_proxy_.call(addr, ctx.get_timeout(),
GCONF.cluster_id, tenant_id_, arg))) { GCONF.cluster_id, tenant_id_, arg))) {
LOG_WARN("failed to set member list", KR(ret), K(ctx.get_timeout()), K(arg), LOG_WARN("failed to set member list", KR(tmp_ret), K(ctx.get_timeout()), K(arg),
K(tenant_id_)); K(tenant_id_));
} else { } else {
rpc_count++; rpc_count++;
} }
} }
if (FAILEDx(check_set_memberlist_result_(rpc_count, paxos_replica_num))) {
LOG_WARN("failed to check set member liset result", KR(ret), K(rpc_count), K(paxos_replica_num)); if (OB_TMP_FAIL(set_member_list_proxy_.wait_all(return_code_array))) {
ret = OB_SUCC(ret) ? tmp_ret : ret;
LOG_WARN("failed to wait all async rpc", KR(ret), KR(tmp_ret), K(rpc_count));
}
if (FAILEDx(check_set_memberlist_result_(rpc_count, return_code_array, paxos_replica_num))) {
LOG_WARN("failed to check set member liset result", KR(ret), K(rpc_count),
K(paxos_replica_num), K(return_code_array));
} }
} }
} }
@ -510,17 +523,14 @@ int ObLSCreator::set_member_list_(const common::ObMemberList &member_list,
} }
int ObLSCreator::check_set_memberlist_result_(const int64_t rpc_count, int ObLSCreator::check_set_memberlist_result_(const int64_t rpc_count,
const ObIArray<int> &return_code_array,
const int64_t paxos_replica_num) const int64_t paxos_replica_num)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
ObArray<int> return_code_array;
if (OB_UNLIKELY(!is_valid())) { if (OB_UNLIKELY(!is_valid())) {
ret = OB_INVALID_ARGUMENT; ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret)); LOG_WARN("invalid argument", KR(ret));
} else if (OB_FAIL(set_member_list_proxy_.wait_all(return_code_array))) {
LOG_WARN("wait all batch result failed", KR(ret));
} else if (rpc_count != return_code_array.count() } else if (rpc_count != return_code_array.count()
|| rpc_count != set_member_list_proxy_.get_args().count()
|| rpc_count != set_member_list_proxy_.get_results().count()) { || rpc_count != set_member_list_proxy_.get_results().count()) {
ret = OB_ERR_UNEXPECTED; ret = OB_ERR_UNEXPECTED;
LOG_WARN("rpc count not equal to result count", KR(ret), K(rpc_count), LOG_WARN("rpc count not equal to result count", KR(ret), K(rpc_count),

View File

@ -149,8 +149,10 @@ private:
ObLSReplicaAddr &ls_replica_addr); ObLSReplicaAddr &ls_replica_addr);
int check_create_ls_result_(const int64_t rpc_count, int check_create_ls_result_(const int64_t rpc_count,
const int64_t paxos_replica_num, const int64_t paxos_replica_num,
const ObIArray<int> &return_code_array,
common::ObMemberList &member_list); common::ObMemberList &member_list);
int check_set_memberlist_result_(const int64_t rpc_count, int check_set_memberlist_result_(const int64_t rpc_count,
const ObIArray<int> &return_code_array,
const int64_t paxos_replica_num); const int64_t paxos_replica_num);
private: private:

View File

@ -430,6 +430,7 @@ class ObString;
ACT(DDL_CHECK_TABLET_MERGE_STATUS,)\ ACT(DDL_CHECK_TABLET_MERGE_STATUS,)\
ACT(MODIFY_HIDDEN_TABLE_NOT_NULL_COLUMN_STATE_BEFORE_PUBLISH_SCHEMA,)\ ACT(MODIFY_HIDDEN_TABLE_NOT_NULL_COLUMN_STATE_BEFORE_PUBLISH_SCHEMA,)\
ACT(AFTER_MIGRATION_FETCH_TABLET_INFO,)\ ACT(AFTER_MIGRATION_FETCH_TABLET_INFO,)\
ACT(BEFORE_SET_LS_MEMBER_LIST,)\
ACT(MAX_DEBUG_SYNC_POINT,) ACT(MAX_DEBUG_SYNC_POINT,)
DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF); DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF);