Fix two bugs: follower can not get full sys tenant ls info and ls meta checker reports too often
This commit is contained in:
@ -1841,11 +1841,8 @@ int ObService::detect_master_rs_ls(
|
|||||||
} else if (replica.is_strong_leader()) {
|
} else if (replica.is_strong_leader()) {
|
||||||
// case 2 : replica is leader, do not use in_service to check whether it is leader or not
|
// case 2 : replica is leader, do not use in_service to check whether it is leader or not
|
||||||
// use in_service could lead to bad case: https://yuque.antfin.com/ob/rootservice/pbw2qw
|
// use in_service could lead to bad case: https://yuque.antfin.com/ob/rootservice/pbw2qw
|
||||||
const ObLSReplica *leader = NULL;
|
if (OB_FAIL(generate_master_rs_ls_info_(replica, ls_info))) {
|
||||||
|
LOG_WARN("generate master rs ls info failed", KR(ret), K(replica), K(ls_info));
|
||||||
// FIXME: Need Use in memory table operator to fill log stream info
|
|
||||||
if (OB_FAIL(ls_info.init_by_replica(replica))) {
|
|
||||||
LOG_WARN("init by replica failed", KR(ret), K(replica));
|
|
||||||
} else if (OB_FAIL(result.init(ObRole::LEADER, master_rs, replica, ls_info))) {
|
} else if (OB_FAIL(result.init(ObRole::LEADER, master_rs, replica, ls_info))) {
|
||||||
LOG_WARN("fail to init result", KR(ret), K(master_rs), K(replica), K(ls_info));
|
LOG_WARN("fail to init result", KR(ret), K(master_rs), K(replica), K(ls_info));
|
||||||
}
|
}
|
||||||
@ -1867,6 +1864,68 @@ int ObService::detect_master_rs_ls(
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use the local leader replica as ls_info by default, while trying to get full ls info from inmemory ls table.
|
||||||
|
// If proposal_id and server of inmemory leader and cur_leader are same, ls_info = cur_leader + inmemory followers.
|
||||||
|
int ObService::generate_master_rs_ls_info_(
|
||||||
|
const share::ObLSReplica &cur_leader,
|
||||||
|
share::ObLSInfo &ls_info)
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
ls_info.reset();
|
||||||
|
ObInMemoryLSTable *inmemory_ls_table = NULL;
|
||||||
|
const ObLSReplica *inmemory_leader = NULL;
|
||||||
|
ObLSInfo inmemory_ls_info;
|
||||||
|
if (OB_UNLIKELY(!inited_) || OB_ISNULL(gctx_.lst_operator_)) {
|
||||||
|
ret = OB_NOT_INIT;
|
||||||
|
LOG_WARN("not init", KR(ret));
|
||||||
|
} else if (OB_UNLIKELY(!cur_leader.is_valid()
|
||||||
|
|| !cur_leader.is_strong_leader()
|
||||||
|
|| cur_leader.get_server() != gctx_.self_addr())) {
|
||||||
|
ret = OB_INVALID_ARGUMENT;
|
||||||
|
LOG_WARN("invalid current leader", KR(ret), K(cur_leader), "self_addr", gctx_.self_addr());
|
||||||
|
} else if (OB_FAIL(ls_info.init_by_replica(cur_leader))) {
|
||||||
|
LOG_WARN("init by replica failed", KR(ret), K(cur_leader));
|
||||||
|
} else if (OB_ISNULL(inmemory_ls_table = gctx_.lst_operator_->get_inmemory_ls())) {
|
||||||
|
ret = OB_ERR_UNEXPECTED;
|
||||||
|
LOG_WARN("inmemory ls_table is null", KR(ret), KP(inmemory_ls_table));
|
||||||
|
} else if (OB_UNLIKELY(!inmemory_ls_table->is_inited())) {
|
||||||
|
// if RS is not started, inmemory_ls_table may be uninitialized
|
||||||
|
} else if (OB_FAIL(inmemory_ls_table->get(
|
||||||
|
GCONF.cluster_id,
|
||||||
|
OB_SYS_TENANT_ID,
|
||||||
|
SYS_LS,
|
||||||
|
share::ObLSTable::DEFAULT_MODE,
|
||||||
|
inmemory_ls_info))) {
|
||||||
|
LOG_WARN("failed to get in memory sys tenant ls info", KR(ret), K(inmemory_ls_info));
|
||||||
|
} else if (OB_FAIL(inmemory_ls_info.find_leader(inmemory_leader))) {
|
||||||
|
if (OB_ENTRY_NOT_EXIST == ret) { // ls replica hasn't been reported to memory
|
||||||
|
ret = OB_SUCCESS;
|
||||||
|
} else {
|
||||||
|
LOG_WARN("fail to find leader in inmemory_ls_info", KR(ret), K(inmemory_ls_info));
|
||||||
|
}
|
||||||
|
} else if (OB_ISNULL(inmemory_leader)) {
|
||||||
|
ret = OB_ERR_UNEXPECTED;
|
||||||
|
LOG_WARN("leader replica can not be null", KR(ret), K(inmemory_ls_info));
|
||||||
|
} else if (inmemory_leader->get_proposal_id() != cur_leader.get_proposal_id()
|
||||||
|
|| inmemory_leader->get_server() != cur_leader.get_server()) {
|
||||||
|
// do not use unreliable inmemory ls info
|
||||||
|
} else {
|
||||||
|
ARRAY_FOREACH(inmemory_ls_info.get_replicas(), idx) {
|
||||||
|
const ObLSReplica &replica = inmemory_ls_info.get_replicas().at(idx);
|
||||||
|
if (!replica.is_strong_leader()) {
|
||||||
|
if (OB_FAIL(ls_info.add_replica(replica))) {
|
||||||
|
LOG_WARN("add replica failed", KR(ret), K(replica), K(ls_info), K(inmemory_ls_info));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (FAILEDx(ls_info.update_replica_status())) {
|
||||||
|
LOG_WARN("update replica status failed", KR(ret),
|
||||||
|
K(ls_info), K(cur_leader), K(inmemory_ls_info));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int ObService::get_root_server_status(ObGetRootserverRoleResult &get_role_result)
|
int ObService::get_root_server_status(ObGetRootserverRoleResult &get_role_result)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
|
|||||||
@ -248,6 +248,9 @@ private:
|
|||||||
int handle_ls_freeze_req_(const obrpc::ObMinorFreezeArg &arg);
|
int handle_ls_freeze_req_(const obrpc::ObMinorFreezeArg &arg);
|
||||||
int tenant_freeze_(const uint64_t tenant_id);
|
int tenant_freeze_(const uint64_t tenant_id);
|
||||||
int ls_freeze_(const uint64_t tenant_id, const share::ObLSID &ls_id, const common::ObTabletID &tablet_id);
|
int ls_freeze_(const uint64_t tenant_id, const share::ObLSID &ls_id, const common::ObTabletID &tablet_id);
|
||||||
|
int generate_master_rs_ls_info_(
|
||||||
|
const share::ObLSReplica &cur_leader,
|
||||||
|
share::ObLSInfo &ls_info);
|
||||||
private:
|
private:
|
||||||
bool inited_;
|
bool inited_;
|
||||||
bool in_register_process_;
|
bool in_register_process_;
|
||||||
|
|||||||
@ -228,7 +228,6 @@ bool ObLSReplica::is_equal_for_report(const ObLSReplica &other) const
|
|||||||
&& role_ == other.role_
|
&& role_ == other.role_
|
||||||
&& member_list_is_equal(member_list_, other.member_list_)
|
&& member_list_is_equal(member_list_, other.member_list_)
|
||||||
&& replica_type_ == other.replica_type_
|
&& replica_type_ == other.replica_type_
|
||||||
&& proposal_id_ == other.proposal_id_
|
|
||||||
&& replica_status_ == other.replica_status_
|
&& replica_status_ == other.replica_status_
|
||||||
&& restore_status_ == other.restore_status_
|
&& restore_status_ == other.restore_status_
|
||||||
&& property_ == other.property_
|
&& property_ == other.property_
|
||||||
@ -237,6 +236,11 @@ bool ObLSReplica::is_equal_for_report(const ObLSReplica &other) const
|
|||||||
&& paxos_replica_number_ == other.paxos_replica_number_) {
|
&& paxos_replica_number_ == other.paxos_replica_number_) {
|
||||||
is_equal = true;
|
is_equal = true;
|
||||||
}
|
}
|
||||||
|
// only proposal_id of leader is meaningful
|
||||||
|
// proposal_id of follower will be set to 0 in reporting process
|
||||||
|
if (is_equal && ObRole::LEADER == role_) {
|
||||||
|
is_equal = (proposal_id_ == other.proposal_id_);
|
||||||
|
}
|
||||||
return is_equal;
|
return is_equal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -503,7 +503,7 @@ int ObPersistentLSTable::set_role_(
|
|||||||
LOG_WARN("convert leader_server ip to string failed", KR(ret), K(leader_server));
|
LOG_WARN("convert leader_server ip to string failed", KR(ret), K(leader_server));
|
||||||
} else if (OB_FAIL(sql.assign_fmt(
|
} else if (OB_FAIL(sql.assign_fmt(
|
||||||
"UPDATE %s "
|
"UPDATE %s "
|
||||||
"SET gmt_modified = now(6), role = ("
|
"SET role = ("
|
||||||
"CASE WHEN svr_ip = '%s' AND svr_port = %d THEN %d "
|
"CASE WHEN svr_ip = '%s' AND svr_port = %d THEN %d "
|
||||||
"ELSE %d end), proposal_id = ("
|
"ELSE %d end), proposal_id = ("
|
||||||
"CASE WHEN svr_ip = '%s' AND svr_port = %d THEN proposal_id "
|
"CASE WHEN svr_ip = '%s' AND svr_port = %d THEN proposal_id "
|
||||||
@ -536,13 +536,10 @@ int ObPersistentLSTable::update_replica_(
|
|||||||
LOG_WARN("fill dml splicer failed", KR(ret), K(replica));
|
LOG_WARN("fill dml splicer failed", KR(ret), K(replica));
|
||||||
} else {
|
} else {
|
||||||
ObDMLExecHelper exec(sql_client, sql_tenant_id);
|
ObDMLExecHelper exec(sql_client, sql_tenant_id);
|
||||||
if (OB_FAIL(dml.add_gmt_modified())) {
|
if (OB_FAIL(exec.exec_insert_update(table_name, dml, affected_rows))) {
|
||||||
LOG_WARN("add gmt modified to dml sql failed", KR(ret));
|
|
||||||
} else if (OB_FAIL(exec.exec_insert_update(table_name, dml, affected_rows))) {
|
|
||||||
//insert_update means if row exist update, if not exist insert
|
//insert_update means if row exist update, if not exist insert
|
||||||
LOG_WARN("execute update failed", KR(ret), K(replica));
|
LOG_WARN("execute update failed", KR(ret), K(replica));
|
||||||
} else if (is_zero_row(affected_rows) || affected_rows > 2) {
|
} else if (OB_UNLIKELY(affected_rows < 0 || affected_rows > 2)) {
|
||||||
// only insert on duplicate key update (insert_update) check affected single row
|
|
||||||
ret = OB_ERR_UNEXPECTED;
|
ret = OB_ERR_UNEXPECTED;
|
||||||
LOG_WARN("unexpected affected_rows", KR(ret), K(affected_rows));
|
LOG_WARN("unexpected affected_rows", KR(ret), K(affected_rows));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -240,7 +240,7 @@ int ObRpcLSTable::do_detect_master_rs_ls_(
|
|||||||
&& start_idx <= end_idx
|
&& start_idx <= end_idx
|
||||||
&& end_idx < server_list.count()
|
&& end_idx < server_list.count()
|
||||||
&& OB_ISNULL(leader)) {
|
&& OB_ISNULL(leader)) {
|
||||||
LOG_INFO("[RPC_LS] do detect master rs", K(cluster_id), K(start_idx), K(end_idx), K(server_list));
|
LOG_TRACE("[RPC_LS] do detect master rs", K(cluster_id), K(start_idx), K(end_idx), K(server_list));
|
||||||
if (OB_FAIL(do_detect_master_rs_ls_(cluster_id, start_idx, end_idx,
|
if (OB_FAIL(do_detect_master_rs_ls_(cluster_id, start_idx, end_idx,
|
||||||
server_list, ls_info))) {
|
server_list, ls_info))) {
|
||||||
LOG_WARN("fail to detect master rs", KR(ret), K(cluster_id),
|
LOG_WARN("fail to detect master rs", KR(ret), K(cluster_id),
|
||||||
@ -248,7 +248,7 @@ int ObRpcLSTable::do_detect_master_rs_ls_(
|
|||||||
} else {
|
} else {
|
||||||
int tmp_ret = ls_info.find_leader(leader);
|
int tmp_ret = ls_info.find_leader(leader);
|
||||||
if (OB_SUCCESS == tmp_ret && OB_NOT_NULL(leader)) {
|
if (OB_SUCCESS == tmp_ret && OB_NOT_NULL(leader)) {
|
||||||
LOG_INFO("[RPC_LS] get master rs", KR(ret), K(cluster_id), "addr", leader->get_server());
|
LOG_TRACE("[RPC_LS] get master rs", KR(ret), K(cluster_id), "addr", leader->get_server());
|
||||||
}
|
}
|
||||||
start_idx = end_idx + 1;
|
start_idx = end_idx + 1;
|
||||||
end_idx = server_list.count() - 1;
|
end_idx = server_list.count() - 1;
|
||||||
|
|||||||
Reference in New Issue
Block a user