Fix two bugs: follower can not get full sys tenant ls info and ls meta checker reports too often
This commit is contained in:
@ -1841,11 +1841,8 @@ int ObService::detect_master_rs_ls(
|
||||
} else if (replica.is_strong_leader()) {
|
||||
// case 2 : replica is leader, do not use in_service to check whether it is leader or not
|
||||
// use in_service could lead to bad case: https://yuque.antfin.com/ob/rootservice/pbw2qw
|
||||
const ObLSReplica *leader = NULL;
|
||||
|
||||
// FIXME: Need Use in memory table operator to fill log stream info
|
||||
if (OB_FAIL(ls_info.init_by_replica(replica))) {
|
||||
LOG_WARN("init by replica failed", KR(ret), K(replica));
|
||||
if (OB_FAIL(generate_master_rs_ls_info_(replica, ls_info))) {
|
||||
LOG_WARN("generate master rs ls info failed", KR(ret), K(replica), K(ls_info));
|
||||
} else if (OB_FAIL(result.init(ObRole::LEADER, master_rs, replica, ls_info))) {
|
||||
LOG_WARN("fail to init result", KR(ret), K(master_rs), K(replica), K(ls_info));
|
||||
}
|
||||
@ -1867,6 +1864,68 @@ int ObService::detect_master_rs_ls(
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Use the local leader replica as ls_info by default, while trying to get full ls info from inmemory ls table.
|
||||
// If proposal_id and server of inmemory leader and cur_leader are same, ls_info = cur_leader + inmemory followers.
|
||||
int ObService::generate_master_rs_ls_info_(
|
||||
const share::ObLSReplica &cur_leader,
|
||||
share::ObLSInfo &ls_info)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ls_info.reset();
|
||||
ObInMemoryLSTable *inmemory_ls_table = NULL;
|
||||
const ObLSReplica *inmemory_leader = NULL;
|
||||
ObLSInfo inmemory_ls_info;
|
||||
if (OB_UNLIKELY(!inited_) || OB_ISNULL(gctx_.lst_operator_)) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not init", KR(ret));
|
||||
} else if (OB_UNLIKELY(!cur_leader.is_valid()
|
||||
|| !cur_leader.is_strong_leader()
|
||||
|| cur_leader.get_server() != gctx_.self_addr())) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid current leader", KR(ret), K(cur_leader), "self_addr", gctx_.self_addr());
|
||||
} else if (OB_FAIL(ls_info.init_by_replica(cur_leader))) {
|
||||
LOG_WARN("init by replica failed", KR(ret), K(cur_leader));
|
||||
} else if (OB_ISNULL(inmemory_ls_table = gctx_.lst_operator_->get_inmemory_ls())) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("inmemory ls_table is null", KR(ret), KP(inmemory_ls_table));
|
||||
} else if (OB_UNLIKELY(!inmemory_ls_table->is_inited())) {
|
||||
// if RS is not started, inmemory_ls_table may be uninitialized
|
||||
} else if (OB_FAIL(inmemory_ls_table->get(
|
||||
GCONF.cluster_id,
|
||||
OB_SYS_TENANT_ID,
|
||||
SYS_LS,
|
||||
share::ObLSTable::DEFAULT_MODE,
|
||||
inmemory_ls_info))) {
|
||||
LOG_WARN("failed to get in memory sys tenant ls info", KR(ret), K(inmemory_ls_info));
|
||||
} else if (OB_FAIL(inmemory_ls_info.find_leader(inmemory_leader))) {
|
||||
if (OB_ENTRY_NOT_EXIST == ret) { // ls replica hasn't been reported to memory
|
||||
ret = OB_SUCCESS;
|
||||
} else {
|
||||
LOG_WARN("fail to find leader in inmemory_ls_info", KR(ret), K(inmemory_ls_info));
|
||||
}
|
||||
} else if (OB_ISNULL(inmemory_leader)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("leader replica can not be null", KR(ret), K(inmemory_ls_info));
|
||||
} else if (inmemory_leader->get_proposal_id() != cur_leader.get_proposal_id()
|
||||
|| inmemory_leader->get_server() != cur_leader.get_server()) {
|
||||
// do not use unreliable inmemory ls info
|
||||
} else {
|
||||
ARRAY_FOREACH(inmemory_ls_info.get_replicas(), idx) {
|
||||
const ObLSReplica &replica = inmemory_ls_info.get_replicas().at(idx);
|
||||
if (!replica.is_strong_leader()) {
|
||||
if (OB_FAIL(ls_info.add_replica(replica))) {
|
||||
LOG_WARN("add replica failed", KR(ret), K(replica), K(ls_info), K(inmemory_ls_info));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (FAILEDx(ls_info.update_replica_status())) {
|
||||
LOG_WARN("update replica status failed", KR(ret),
|
||||
K(ls_info), K(cur_leader), K(inmemory_ls_info));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObService::get_root_server_status(ObGetRootserverRoleResult &get_role_result)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
@ -248,6 +248,9 @@ private:
|
||||
int handle_ls_freeze_req_(const obrpc::ObMinorFreezeArg &arg);
|
||||
int tenant_freeze_(const uint64_t tenant_id);
|
||||
int ls_freeze_(const uint64_t tenant_id, const share::ObLSID &ls_id, const common::ObTabletID &tablet_id);
|
||||
int generate_master_rs_ls_info_(
|
||||
const share::ObLSReplica &cur_leader,
|
||||
share::ObLSInfo &ls_info);
|
||||
private:
|
||||
bool inited_;
|
||||
bool in_register_process_;
|
||||
|
||||
@ -228,7 +228,6 @@ bool ObLSReplica::is_equal_for_report(const ObLSReplica &other) const
|
||||
&& role_ == other.role_
|
||||
&& member_list_is_equal(member_list_, other.member_list_)
|
||||
&& replica_type_ == other.replica_type_
|
||||
&& proposal_id_ == other.proposal_id_
|
||||
&& replica_status_ == other.replica_status_
|
||||
&& restore_status_ == other.restore_status_
|
||||
&& property_ == other.property_
|
||||
@ -237,6 +236,11 @@ bool ObLSReplica::is_equal_for_report(const ObLSReplica &other) const
|
||||
&& paxos_replica_number_ == other.paxos_replica_number_) {
|
||||
is_equal = true;
|
||||
}
|
||||
// only proposal_id of leader is meaningful
|
||||
// proposal_id of follower will be set to 0 in reporting process
|
||||
if (is_equal && ObRole::LEADER == role_) {
|
||||
is_equal = (proposal_id_ == other.proposal_id_);
|
||||
}
|
||||
return is_equal;
|
||||
}
|
||||
|
||||
|
||||
@ -503,7 +503,7 @@ int ObPersistentLSTable::set_role_(
|
||||
LOG_WARN("convert leader_server ip to string failed", KR(ret), K(leader_server));
|
||||
} else if (OB_FAIL(sql.assign_fmt(
|
||||
"UPDATE %s "
|
||||
"SET gmt_modified = now(6), role = ("
|
||||
"SET role = ("
|
||||
"CASE WHEN svr_ip = '%s' AND svr_port = %d THEN %d "
|
||||
"ELSE %d end), proposal_id = ("
|
||||
"CASE WHEN svr_ip = '%s' AND svr_port = %d THEN proposal_id "
|
||||
@ -536,13 +536,10 @@ int ObPersistentLSTable::update_replica_(
|
||||
LOG_WARN("fill dml splicer failed", KR(ret), K(replica));
|
||||
} else {
|
||||
ObDMLExecHelper exec(sql_client, sql_tenant_id);
|
||||
if (OB_FAIL(dml.add_gmt_modified())) {
|
||||
LOG_WARN("add gmt modified to dml sql failed", KR(ret));
|
||||
} else if (OB_FAIL(exec.exec_insert_update(table_name, dml, affected_rows))) {
|
||||
if (OB_FAIL(exec.exec_insert_update(table_name, dml, affected_rows))) {
|
||||
//insert_update means if row exist update, if not exist insert
|
||||
LOG_WARN("execute update failed", KR(ret), K(replica));
|
||||
} else if (is_zero_row(affected_rows) || affected_rows > 2) {
|
||||
// only insert on duplicate key update (insert_update) check affected single row
|
||||
} else if (OB_UNLIKELY(affected_rows < 0 || affected_rows > 2)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected affected_rows", KR(ret), K(affected_rows));
|
||||
}
|
||||
|
||||
@ -240,7 +240,7 @@ int ObRpcLSTable::do_detect_master_rs_ls_(
|
||||
&& start_idx <= end_idx
|
||||
&& end_idx < server_list.count()
|
||||
&& OB_ISNULL(leader)) {
|
||||
LOG_INFO("[RPC_LS] do detect master rs", K(cluster_id), K(start_idx), K(end_idx), K(server_list));
|
||||
LOG_TRACE("[RPC_LS] do detect master rs", K(cluster_id), K(start_idx), K(end_idx), K(server_list));
|
||||
if (OB_FAIL(do_detect_master_rs_ls_(cluster_id, start_idx, end_idx,
|
||||
server_list, ls_info))) {
|
||||
LOG_WARN("fail to detect master rs", KR(ret), K(cluster_id),
|
||||
@ -248,7 +248,7 @@ int ObRpcLSTable::do_detect_master_rs_ls_(
|
||||
} else {
|
||||
int tmp_ret = ls_info.find_leader(leader);
|
||||
if (OB_SUCCESS == tmp_ret && OB_NOT_NULL(leader)) {
|
||||
LOG_INFO("[RPC_LS] get master rs", KR(ret), K(cluster_id), "addr", leader->get_server());
|
||||
LOG_TRACE("[RPC_LS] get master rs", KR(ret), K(cluster_id), "addr", leader->get_server());
|
||||
}
|
||||
start_idx = end_idx + 1;
|
||||
end_idx = server_list.count() - 1;
|
||||
|
||||
Reference in New Issue
Block a user