From c2d837645fe2ca6f2499dd779e524c8f1c5b66d4 Mon Sep 17 00:00:00 2001 From: pe-99y <315053752@qq.com> Date: Wed, 20 Nov 2024 15:15:05 +0000 Subject: [PATCH] fix select wrong leader when in statement-level retry --- mittest/mtlenv/storage/test_trans.cpp | 4 ++-- src/sql/das/ob_das_location_router.cpp | 27 ++++++++++++------------ src/sql/engine/ob_exec_context.h | 1 + src/sql/ob_sql.cpp | 1 + src/storage/tx/ob_tablet_to_ls_cache.cpp | 4 +++- 5 files changed, 21 insertions(+), 16 deletions(-) diff --git a/mittest/mtlenv/storage/test_trans.cpp b/mittest/mtlenv/storage/test_trans.cpp index 8085f1ac9..8821c605f 100644 --- a/mittest/mtlenv/storage/test_trans.cpp +++ b/mittest/mtlenv/storage/test_trans.cpp @@ -506,7 +506,7 @@ TEST_F(TestTrans, tablet_to_ls_cache) ASSERT_EQ(true, is_local); // tablet not exist const ObTabletID &tablet_id_2 = tablet_ids_2.at(i); - ASSERT_EQ(OB_ENTRY_NOT_EXIST, tx_service->check_and_get_ls_info(tablet_id_2, ls_id, is_local)); + ASSERT_EQ(OB_SUCCESS, tx_service->check_and_get_ls_info(tablet_id_2, ls_id, is_local)); } ASSERT_EQ(TABLET_NUM + base_size, tx_service->tablet_to_ls_cache_.size()); @@ -521,7 +521,7 @@ TEST_F(TestTrans, tablet_to_ls_cache) ASSERT_EQ(ls_id_1, ls_id); ASSERT_EQ(true, is_local); tx_service->remove_tablet(tablet_id, ls_id); - ASSERT_EQ(OB_ENTRY_NOT_EXIST, tx_service->check_and_get_ls_info(tablet_id, ls_id, is_local)); + ASSERT_EQ(OB_SUCCESS, tx_service->check_and_get_ls_info(tablet_id, ls_id, is_local)); } ASSERT_EQ(0, tx_service->tablet_to_ls_cache_.size()); ASSERT_EQ(base_ref, ls_tx_ctx_mgr->get_ref()); diff --git a/src/sql/das/ob_das_location_router.cpp b/src/sql/das/ob_das_location_router.cpp index b9d2509e6..82d7c9fe4 100755 --- a/src/sql/das/ob_das_location_router.cpp +++ b/src/sql/das/ob_das_location_router.cpp @@ -999,26 +999,27 @@ int ObDASLocationRouter::nonblock_get_leader(const uint64_t tenant_id, bool is_local_leader = false; if (OB_FAIL(all_tablet_list_.push_back(tablet_id))) { LOG_WARN("store access tablet id failed", K(ret), K(tablet_id)); - } else if (get_total_retry_cnt() > 0 || OB_FAIL(trans_service->check_and_get_ls_info(tablet_id, tablet_loc.ls_id_, is_local_leader))) { - ret = OB_SUCCESS; - if (OB_FAIL(GCTX.location_service_->nonblock_get(tenant_id, - tablet_id, - tablet_loc.ls_id_))) { - LOG_WARN("nonblock get ls id failed", K(ret), K(tablet_id)); - } else if (OB_FAIL(GCTX.location_service_->nonblock_get_leader(GCONF.cluster_id, - tenant_id, - tablet_loc.ls_id_, - tablet_loc.server_))) { - LOG_WARN("nonblock get ls location failed", K(ret), K(tablet_loc)); - } - } else if (is_local_leader) { + } else if (get_total_retry_cnt() == 0 + && OB_SUCC(trans_service->check_and_get_ls_info(tablet_id, tablet_loc.ls_id_, is_local_leader)) + && is_local_leader) { + // when not in retry, try local leader optimization tablet_loc.server_ = GCTX.self_addr(); + } else if (OB_FAIL(GCTX.location_service_->nonblock_get(tenant_id, + tablet_id, + tablet_loc.ls_id_))) { + LOG_WARN("nonblock get ls id failed", K(ret), K(tablet_id)); } else if (OB_FAIL(GCTX.location_service_->nonblock_get_leader(GCONF.cluster_id, tenant_id, tablet_loc.ls_id_, tablet_loc.server_))) { LOG_WARN("nonblock get ls location failed", K(ret), K(tablet_loc)); } + if (OB_SUCC(ret) && get_total_retry_cnt() > 0 && last_errno_ == OB_NOT_MASTER) { + // flush ls cache when OB_NOT_MASTER + if (OB_FAIL(trans_service->remove_tablet(tablet_id, tablet_loc.ls_id_))) { + LOG_WARN("failed to remove tablet cache", K(ret), K(tablet_id)); + } + } if (is_partition_change_error(ret)) { /*During the execution phase, if nonblock location interface is used to obtain the location * and an exception occurs, retries are necessary. diff --git a/src/sql/engine/ob_exec_context.h b/src/sql/engine/ob_exec_context.h index af545d0bf..61f2932d6 100644 --- a/src/sql/engine/ob_exec_context.h +++ b/src/sql/engine/ob_exec_context.h @@ -557,6 +557,7 @@ public: AutoDopHashMap& get_auto_dop_map() { return auto_dop_map_; } void set_force_gen_local_plan() { force_local_plan_ = true; } bool is_force_gen_local_plan() const { return force_local_plan_; } + void set_retry_info(const ObQueryRetryInfo *retry_info) { das_ctx_.get_location_router().set_retry_info(retry_info); } private: int build_temp_expr_ctx(const ObTempExpr &temp_expr, ObTempExprCtx *&temp_expr_ctx); diff --git a/src/sql/ob_sql.cpp b/src/sql/ob_sql.cpp index 3ea8a64ee..b3428eff8 100644 --- a/src/sql/ob_sql.cpp +++ b/src/sql/ob_sql.cpp @@ -4070,6 +4070,7 @@ OB_INLINE int ObSql::init_exec_context(const ObSqlCtx &context, ObExecContext &e context.session_info_->get_query_start_time() + query_timeout); exec_ctx.get_physical_plan_ctx()->set_rich_format( context.session_info_->use_rich_format()); + exec_ctx.set_retry_info(&context.session_info_->get_retry_info()); } } return ret; diff --git a/src/storage/tx/ob_tablet_to_ls_cache.cpp b/src/storage/tx/ob_tablet_to_ls_cache.cpp index 73bbb35e2..351a3eead 100644 --- a/src/storage/tx/ob_tablet_to_ls_cache.cpp +++ b/src/storage/tx/ob_tablet_to_ls_cache.cpp @@ -205,7 +205,9 @@ int ObTabletToLSCache::check_and_get_ls_info(const common::ObTabletID &tablet_id ret = OB_INVALID_ARGUMENT; TRANS_LOG(WARN, "invalid argument", KR(ret), K(tablet_id)); } else if (OB_FAIL(map_.get(tablet_id, ls_cache))) { - if (OB_ENTRY_NOT_EXIST != ret) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { TRANS_LOG(WARN, "get ls cache fail", KR(ret), K(tablet_id)); } } else {