fix select wrong leader when in statement-level retry

This commit is contained in:
pe-99y 2024-11-20 15:15:05 +00:00 committed by ob-robot
parent 3aecb7da09
commit c2d837645f
5 changed files with 21 additions and 16 deletions

View File

@ -506,7 +506,7 @@ TEST_F(TestTrans, tablet_to_ls_cache)
ASSERT_EQ(true, is_local);
// tablet not exist
const ObTabletID &tablet_id_2 = tablet_ids_2.at(i);
ASSERT_EQ(OB_ENTRY_NOT_EXIST, tx_service->check_and_get_ls_info(tablet_id_2, ls_id, is_local));
ASSERT_EQ(OB_SUCCESS, tx_service->check_and_get_ls_info(tablet_id_2, ls_id, is_local));
}
ASSERT_EQ(TABLET_NUM + base_size, tx_service->tablet_to_ls_cache_.size());
@ -521,7 +521,7 @@ TEST_F(TestTrans, tablet_to_ls_cache)
ASSERT_EQ(ls_id_1, ls_id);
ASSERT_EQ(true, is_local);
tx_service->remove_tablet(tablet_id, ls_id);
ASSERT_EQ(OB_ENTRY_NOT_EXIST, tx_service->check_and_get_ls_info(tablet_id, ls_id, is_local));
ASSERT_EQ(OB_SUCCESS, tx_service->check_and_get_ls_info(tablet_id, ls_id, is_local));
}
ASSERT_EQ(0, tx_service->tablet_to_ls_cache_.size());
ASSERT_EQ(base_ref, ls_tx_ctx_mgr->get_ref());

View File

@ -999,26 +999,27 @@ int ObDASLocationRouter::nonblock_get_leader(const uint64_t tenant_id,
bool is_local_leader = false;
if (OB_FAIL(all_tablet_list_.push_back(tablet_id))) {
LOG_WARN("store access tablet id failed", K(ret), K(tablet_id));
} else if (get_total_retry_cnt() > 0 || OB_FAIL(trans_service->check_and_get_ls_info(tablet_id, tablet_loc.ls_id_, is_local_leader))) {
ret = OB_SUCCESS;
if (OB_FAIL(GCTX.location_service_->nonblock_get(tenant_id,
tablet_id,
tablet_loc.ls_id_))) {
LOG_WARN("nonblock get ls id failed", K(ret), K(tablet_id));
} else if (OB_FAIL(GCTX.location_service_->nonblock_get_leader(GCONF.cluster_id,
tenant_id,
tablet_loc.ls_id_,
tablet_loc.server_))) {
LOG_WARN("nonblock get ls location failed", K(ret), K(tablet_loc));
}
} else if (is_local_leader) {
} else if (get_total_retry_cnt() == 0
&& OB_SUCC(trans_service->check_and_get_ls_info(tablet_id, tablet_loc.ls_id_, is_local_leader))
&& is_local_leader) {
// when not in retry, try local leader optimization
tablet_loc.server_ = GCTX.self_addr();
} else if (OB_FAIL(GCTX.location_service_->nonblock_get(tenant_id,
tablet_id,
tablet_loc.ls_id_))) {
LOG_WARN("nonblock get ls id failed", K(ret), K(tablet_id));
} else if (OB_FAIL(GCTX.location_service_->nonblock_get_leader(GCONF.cluster_id,
tenant_id,
tablet_loc.ls_id_,
tablet_loc.server_))) {
LOG_WARN("nonblock get ls location failed", K(ret), K(tablet_loc));
}
if (OB_SUCC(ret) && get_total_retry_cnt() > 0 && last_errno_ == OB_NOT_MASTER) {
// flush ls cache when OB_NOT_MASTER
if (OB_FAIL(trans_service->remove_tablet(tablet_id, tablet_loc.ls_id_))) {
LOG_WARN("failed to remove tablet cache", K(ret), K(tablet_id));
}
}
if (is_partition_change_error(ret)) {
/*During the execution phase, if nonblock location interface is used to obtain the location
* and an exception occurs, retries are necessary.

View File

@ -557,6 +557,7 @@ public:
AutoDopHashMap& get_auto_dop_map() { return auto_dop_map_; }
void set_force_gen_local_plan() { force_local_plan_ = true; }
bool is_force_gen_local_plan() const { return force_local_plan_; }
void set_retry_info(const ObQueryRetryInfo *retry_info) { das_ctx_.get_location_router().set_retry_info(retry_info); }
private:
int build_temp_expr_ctx(const ObTempExpr &temp_expr, ObTempExprCtx *&temp_expr_ctx);

View File

@ -4070,6 +4070,7 @@ OB_INLINE int ObSql::init_exec_context(const ObSqlCtx &context, ObExecContext &e
context.session_info_->get_query_start_time() + query_timeout);
exec_ctx.get_physical_plan_ctx()->set_rich_format(
context.session_info_->use_rich_format());
exec_ctx.set_retry_info(&context.session_info_->get_retry_info());
}
}
return ret;

View File

@ -205,7 +205,9 @@ int ObTabletToLSCache::check_and_get_ls_info(const common::ObTabletID &tablet_id
ret = OB_INVALID_ARGUMENT;
TRANS_LOG(WARN, "invalid argument", KR(ret), K(tablet_id));
} else if (OB_FAIL(map_.get(tablet_id, ls_cache))) {
if (OB_ENTRY_NOT_EXIST != ret) {
if (OB_ENTRY_NOT_EXIST == ret) {
ret = OB_SUCCESS;
} else {
TRANS_LOG(WARN, "get ls cache fail", KR(ret), K(tablet_id));
}
} else {