Fix two bugs: follower can not get full sys tenant ls info and ls meta checker reports too often

This commit is contained in:
obdev
2023-02-07 10:57:15 +08:00
committed by ob-robot
parent 13bcd47b9a
commit bc4490783f
5 changed files with 77 additions and 14 deletions

View File

@ -1841,11 +1841,8 @@ int ObService::detect_master_rs_ls(
} else if (replica.is_strong_leader()) { } else if (replica.is_strong_leader()) {
// case 2 : replica is leader, do not use in_service to check whether it is leader or not // case 2 : replica is leader, do not use in_service to check whether it is leader or not
// use in_service could lead to bad case: https://yuque.antfin.com/ob/rootservice/pbw2qw // use in_service could lead to bad case: https://yuque.antfin.com/ob/rootservice/pbw2qw
const ObLSReplica *leader = NULL; if (OB_FAIL(generate_master_rs_ls_info_(replica, ls_info))) {
LOG_WARN("generate master rs ls info failed", KR(ret), K(replica), K(ls_info));
// FIXME: Need Use in memory table operator to fill log stream info
if (OB_FAIL(ls_info.init_by_replica(replica))) {
LOG_WARN("init by replica failed", KR(ret), K(replica));
} else if (OB_FAIL(result.init(ObRole::LEADER, master_rs, replica, ls_info))) { } else if (OB_FAIL(result.init(ObRole::LEADER, master_rs, replica, ls_info))) {
LOG_WARN("fail to init result", KR(ret), K(master_rs), K(replica), K(ls_info)); LOG_WARN("fail to init result", KR(ret), K(master_rs), K(replica), K(ls_info));
} }
@ -1867,6 +1864,68 @@ int ObService::detect_master_rs_ls(
return ret; return ret;
} }
// Use the local leader replica as ls_info by default, while trying to get full ls info from inmemory ls table.
// If proposal_id and server of inmemory leader and cur_leader are same, ls_info = cur_leader + inmemory followers.
int ObService::generate_master_rs_ls_info_(
const share::ObLSReplica &cur_leader,
share::ObLSInfo &ls_info)
{
int ret = OB_SUCCESS;
ls_info.reset();
ObInMemoryLSTable *inmemory_ls_table = NULL;
const ObLSReplica *inmemory_leader = NULL;
ObLSInfo inmemory_ls_info;
if (OB_UNLIKELY(!inited_) || OB_ISNULL(gctx_.lst_operator_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret));
} else if (OB_UNLIKELY(!cur_leader.is_valid()
|| !cur_leader.is_strong_leader()
|| cur_leader.get_server() != gctx_.self_addr())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid current leader", KR(ret), K(cur_leader), "self_addr", gctx_.self_addr());
} else if (OB_FAIL(ls_info.init_by_replica(cur_leader))) {
LOG_WARN("init by replica failed", KR(ret), K(cur_leader));
} else if (OB_ISNULL(inmemory_ls_table = gctx_.lst_operator_->get_inmemory_ls())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("inmemory ls_table is null", KR(ret), KP(inmemory_ls_table));
} else if (OB_UNLIKELY(!inmemory_ls_table->is_inited())) {
// if RS is not started, inmemory_ls_table may be uninitialized
} else if (OB_FAIL(inmemory_ls_table->get(
GCONF.cluster_id,
OB_SYS_TENANT_ID,
SYS_LS,
share::ObLSTable::DEFAULT_MODE,
inmemory_ls_info))) {
LOG_WARN("failed to get in memory sys tenant ls info", KR(ret), K(inmemory_ls_info));
} else if (OB_FAIL(inmemory_ls_info.find_leader(inmemory_leader))) {
if (OB_ENTRY_NOT_EXIST == ret) { // ls replica hasn't been reported to memory
ret = OB_SUCCESS;
} else {
LOG_WARN("fail to find leader in inmemory_ls_info", KR(ret), K(inmemory_ls_info));
}
} else if (OB_ISNULL(inmemory_leader)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("leader replica can not be null", KR(ret), K(inmemory_ls_info));
} else if (inmemory_leader->get_proposal_id() != cur_leader.get_proposal_id()
|| inmemory_leader->get_server() != cur_leader.get_server()) {
// do not use unreliable inmemory ls info
} else {
ARRAY_FOREACH(inmemory_ls_info.get_replicas(), idx) {
const ObLSReplica &replica = inmemory_ls_info.get_replicas().at(idx);
if (!replica.is_strong_leader()) {
if (OB_FAIL(ls_info.add_replica(replica))) {
LOG_WARN("add replica failed", KR(ret), K(replica), K(ls_info), K(inmemory_ls_info));
}
}
}
if (FAILEDx(ls_info.update_replica_status())) {
LOG_WARN("update replica status failed", KR(ret),
K(ls_info), K(cur_leader), K(inmemory_ls_info));
}
}
return ret;
}
int ObService::get_root_server_status(ObGetRootserverRoleResult &get_role_result) int ObService::get_root_server_status(ObGetRootserverRoleResult &get_role_result)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;

View File

@ -248,6 +248,9 @@ private:
int handle_ls_freeze_req_(const obrpc::ObMinorFreezeArg &arg); int handle_ls_freeze_req_(const obrpc::ObMinorFreezeArg &arg);
int tenant_freeze_(const uint64_t tenant_id); int tenant_freeze_(const uint64_t tenant_id);
int ls_freeze_(const uint64_t tenant_id, const share::ObLSID &ls_id, const common::ObTabletID &tablet_id); int ls_freeze_(const uint64_t tenant_id, const share::ObLSID &ls_id, const common::ObTabletID &tablet_id);
int generate_master_rs_ls_info_(
const share::ObLSReplica &cur_leader,
share::ObLSInfo &ls_info);
private: private:
bool inited_; bool inited_;
bool in_register_process_; bool in_register_process_;

View File

@ -228,7 +228,6 @@ bool ObLSReplica::is_equal_for_report(const ObLSReplica &other) const
&& role_ == other.role_ && role_ == other.role_
&& member_list_is_equal(member_list_, other.member_list_) && member_list_is_equal(member_list_, other.member_list_)
&& replica_type_ == other.replica_type_ && replica_type_ == other.replica_type_
&& proposal_id_ == other.proposal_id_
&& replica_status_ == other.replica_status_ && replica_status_ == other.replica_status_
&& restore_status_ == other.restore_status_ && restore_status_ == other.restore_status_
&& property_ == other.property_ && property_ == other.property_
@ -237,6 +236,11 @@ bool ObLSReplica::is_equal_for_report(const ObLSReplica &other) const
&& paxos_replica_number_ == other.paxos_replica_number_) { && paxos_replica_number_ == other.paxos_replica_number_) {
is_equal = true; is_equal = true;
} }
// only proposal_id of leader is meaningful
// proposal_id of follower will be set to 0 in reporting process
if (is_equal && ObRole::LEADER == role_) {
is_equal = (proposal_id_ == other.proposal_id_);
}
return is_equal; return is_equal;
} }

View File

@ -503,7 +503,7 @@ int ObPersistentLSTable::set_role_(
LOG_WARN("convert leader_server ip to string failed", KR(ret), K(leader_server)); LOG_WARN("convert leader_server ip to string failed", KR(ret), K(leader_server));
} else if (OB_FAIL(sql.assign_fmt( } else if (OB_FAIL(sql.assign_fmt(
"UPDATE %s " "UPDATE %s "
"SET gmt_modified = now(6), role = (" "SET role = ("
"CASE WHEN svr_ip = '%s' AND svr_port = %d THEN %d " "CASE WHEN svr_ip = '%s' AND svr_port = %d THEN %d "
"ELSE %d end), proposal_id = (" "ELSE %d end), proposal_id = ("
"CASE WHEN svr_ip = '%s' AND svr_port = %d THEN proposal_id " "CASE WHEN svr_ip = '%s' AND svr_port = %d THEN proposal_id "
@ -536,13 +536,10 @@ int ObPersistentLSTable::update_replica_(
LOG_WARN("fill dml splicer failed", KR(ret), K(replica)); LOG_WARN("fill dml splicer failed", KR(ret), K(replica));
} else { } else {
ObDMLExecHelper exec(sql_client, sql_tenant_id); ObDMLExecHelper exec(sql_client, sql_tenant_id);
if (OB_FAIL(dml.add_gmt_modified())) { if (OB_FAIL(exec.exec_insert_update(table_name, dml, affected_rows))) {
LOG_WARN("add gmt modified to dml sql failed", KR(ret));
} else if (OB_FAIL(exec.exec_insert_update(table_name, dml, affected_rows))) {
//insert_update means if row exist update, if not exist insert //insert_update means if row exist update, if not exist insert
LOG_WARN("execute update failed", KR(ret), K(replica)); LOG_WARN("execute update failed", KR(ret), K(replica));
} else if (is_zero_row(affected_rows) || affected_rows > 2) { } else if (OB_UNLIKELY(affected_rows < 0 || affected_rows > 2)) {
// only insert on duplicate key update (insert_update) check affected single row
ret = OB_ERR_UNEXPECTED; ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected affected_rows", KR(ret), K(affected_rows)); LOG_WARN("unexpected affected_rows", KR(ret), K(affected_rows));
} }

View File

@ -240,7 +240,7 @@ int ObRpcLSTable::do_detect_master_rs_ls_(
&& start_idx <= end_idx && start_idx <= end_idx
&& end_idx < server_list.count() && end_idx < server_list.count()
&& OB_ISNULL(leader)) { && OB_ISNULL(leader)) {
LOG_INFO("[RPC_LS] do detect master rs", K(cluster_id), K(start_idx), K(end_idx), K(server_list)); LOG_TRACE("[RPC_LS] do detect master rs", K(cluster_id), K(start_idx), K(end_idx), K(server_list));
if (OB_FAIL(do_detect_master_rs_ls_(cluster_id, start_idx, end_idx, if (OB_FAIL(do_detect_master_rs_ls_(cluster_id, start_idx, end_idx,
server_list, ls_info))) { server_list, ls_info))) {
LOG_WARN("fail to detect master rs", KR(ret), K(cluster_id), LOG_WARN("fail to detect master rs", KR(ret), K(cluster_id),
@ -248,7 +248,7 @@ int ObRpcLSTable::do_detect_master_rs_ls_(
} else { } else {
int tmp_ret = ls_info.find_leader(leader); int tmp_ret = ls_info.find_leader(leader);
if (OB_SUCCESS == tmp_ret && OB_NOT_NULL(leader)) { if (OB_SUCCESS == tmp_ret && OB_NOT_NULL(leader)) {
LOG_INFO("[RPC_LS] get master rs", KR(ret), K(cluster_id), "addr", leader->get_server()); LOG_TRACE("[RPC_LS] get master rs", KR(ret), K(cluster_id), "addr", leader->get_server());
} }
start_idx = end_idx + 1; start_idx = end_idx + 1;
end_idx = server_list.count() - 1; end_idx = server_list.count() - 1;