From 752e8135efa978f695b7ec926298a7ff0eb55ae7 Mon Sep 17 00:00:00 2001 From: ChangerR Date: Wed, 16 Oct 2024 07:13:46 +0000 Subject: [PATCH] [CP] fix dynamic sampling retry 4723 cause sql query timeout --- src/share/stat/ob_basic_stats_estimator.cpp | 11 ++++++++-- src/share/stat/ob_basic_stats_estimator.h | 1 + src/sql/ob_sql_utils.cpp | 14 +++++++++++++ src/sql/ob_sql_utils.h | 3 +++ .../optimizer/ob_access_path_estimation.cpp | 2 +- src/sql/optimizer/ob_dynamic_sampling.cpp | 21 +++++++++++++++++-- src/sql/optimizer/ob_opt_selectivity.cpp | 2 +- 7 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/share/stat/ob_basic_stats_estimator.cpp b/src/share/stat/ob_basic_stats_estimator.cpp index 7e231f941a..be34a7e3e2 100644 --- a/src/share/stat/ob_basic_stats_estimator.cpp +++ b/src/share/stat/ob_basic_stats_estimator.cpp @@ -335,7 +335,8 @@ int ObBasicStatsEstimator::do_estimate_block_count(ObExecContext &ctx, if (OB_FAIL(THIS_WORKER.check_status())) { LOG_WARN("failed to check status", K(ret)); retry_cnt = MAX_RETRY_CNT; - } else if (OB_FAIL(do_estimate_block_count_and_row_count(ctx, tenant_id, table_id, tablet_ids, + } else if (OB_FAIL(do_estimate_block_count_and_row_count(ctx, tenant_id, table_id, + false, tablet_ids, partition_ids, column_group_ids, estimate_res))) { LOG_WARN("failed to do estimate block count and row count", K(ret)); if (DAS_CTX(ctx).get_location_router().is_refresh_location_error(ret)) { @@ -353,6 +354,7 @@ int ObBasicStatsEstimator::do_estimate_block_count(ObExecContext &ctx, int ObBasicStatsEstimator::do_estimate_block_count_and_row_count(ObExecContext &ctx, const uint64_t tenant_id, const uint64_t table_id, + bool force_leader, const ObIArray &tablet_ids, const ObIArray &partition_ids, const ObIArray &column_group_ids, @@ -374,12 +376,17 @@ int ObBasicStatsEstimator::do_estimate_block_count_and_row_count(ObExecContext & ObSEArray all_selected_addr; for (int64_t i = 0; OB_SUCC(ret) && i < candi_tablet_locs.count(); ++i) { ObAddr selected_addr; - if (OB_FAIL(ObSQLUtils::choose_best_partition_replica_addr(ctx.get_addr(), + if (!force_leader && + OB_FAIL(ObSQLUtils::choose_best_partition_replica_addr(ctx.get_addr(), candi_tablet_locs.at(i), true, selected_addr))) { LOG_WARN("failed to get best partition replica addr", K(ret), K(candi_tablet_locs), K(i), K(ctx.get_addr())); + } else if (force_leader && + OB_FAIL(ObSQLUtils::get_strong_partition_replica_addr(candi_tablet_locs.at(i), + selected_addr))) { + LOG_WARN("failed to get strong partition replicate addr", K(ret)); } else if (OB_FAIL(all_selected_addr.push_back(selected_addr))) { LOG_WARN("failed to push back", K(ret)); } else {/*do nothing*/} diff --git a/src/share/stat/ob_basic_stats_estimator.h b/src/share/stat/ob_basic_stats_estimator.h index 7ce0c2323f..31a361e8e5 100644 --- a/src/share/stat/ob_basic_stats_estimator.h +++ b/src/share/stat/ob_basic_stats_estimator.h @@ -116,6 +116,7 @@ public: static int do_estimate_block_count_and_row_count(ObExecContext &ctx, const uint64_t tenant_id, const uint64_t table_id, + bool force_leader, const ObIArray &tablet_ids, const ObIArray &partition_ids, const ObIArray &column_group_ids, diff --git a/src/sql/ob_sql_utils.cpp b/src/sql/ob_sql_utils.cpp index f1a04cfa5f..f6831fd70b 100644 --- a/src/sql/ob_sql_utils.cpp +++ b/src/sql/ob_sql_utils.cpp @@ -6331,3 +6331,17 @@ bool ObSQLUtils::is_data_version_ge_424_or_433(uint64_t data_version) { return ((MOCK_DATA_VERSION_4_2_4_0 <= data_version && data_version < DATA_VERSION_4_3_0_0) || data_version >= DATA_VERSION_4_3_3_0); } + +int ObSQLUtils::get_strong_partition_replica_addr(const ObCandiTabletLoc &phy_part_loc_info, + ObAddr &selected_addr) +{ + int ret = OB_SUCCESS; + const ObOptTabletLoc &loc = phy_part_loc_info.get_partition_location(); + share::ObLSReplicaLocation replica_location; + if (OB_FAIL(loc.get_strong_leader(replica_location))) { + LOG_WARN("failed to get strong leader", K(ret)); + } else { + selected_addr = replica_location.get_server(); + } + return ret; +} diff --git a/src/sql/ob_sql_utils.h b/src/sql/ob_sql_utils.h index d97b3d2532..d841597b15 100644 --- a/src/sql/ob_sql_utils.h +++ b/src/sql/ob_sql_utils.h @@ -766,6 +766,9 @@ public: const ObProxyInfo &proxied_info, ObIArray &new_role_id_array, ObIArray &new_role_id_option_array); + + static int get_strong_partition_replica_addr(const ObCandiTabletLoc &phy_part_loc_info, + ObAddr &selected_addr); private: static bool check_mysql50_prefix(common::ObString &db_name); static bool part_expr_has_virtual_column(const ObExpr *part_expr); diff --git a/src/sql/optimizer/ob_access_path_estimation.cpp b/src/sql/optimizer/ob_access_path_estimation.cpp index 6ac64e49ec..8664724864 100644 --- a/src/sql/optimizer/ob_access_path_estimation.cpp +++ b/src/sql/optimizer/ob_access_path_estimation.cpp @@ -2075,7 +2075,7 @@ int ObAccessPathEstimation::process_dynamic_sampling_estimation(ObOptimizerConte int64_t start_time = ObTimeUtility::current_time(); bool throw_ds_error = false; if (OB_FAIL(dynamic_sampling.estimate_table_rowcount(ds_table_param, ds_result_items, throw_ds_error))) { - if (!throw_ds_error && !is_retry_ret(ret)) { + if (!throw_ds_error) { LOG_WARN("failed to estimate table rowcount caused by some reason, please check!!!", K(ret), K(start_time), K(ObTimeUtility::current_time() - start_time), K(ds_table_param), K(ctx.get_session_info()->get_current_query_string())); diff --git a/src/sql/optimizer/ob_dynamic_sampling.cpp b/src/sql/optimizer/ob_dynamic_sampling.cpp index fc77287fd2..c66ed1129d 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.cpp +++ b/src/sql/optimizer/ob_dynamic_sampling.cpp @@ -25,6 +25,7 @@ #include "sql/optimizer/ob_optimizer_context.h" #include "sql/optimizer/ob_opt_selectivity.h" #include "sql/optimizer/ob_log_plan.h" +#include "sql/optimizer/ob_access_path_estimation.h" using namespace oceanbase::common; using namespace oceanbase::sql; namespace oceanbase { @@ -872,12 +873,28 @@ int ObDynamicSampling::estimate_table_block_count_and_row_count(const ObDSTableP } else if (OB_FAIL(ObBasicStatsEstimator::do_estimate_block_count_and_row_count(*ctx_->get_exec_ctx(), ctx_->get_session_info()->get_effective_tenant_id(), param.table_id_, + false, tablet_ids, partition_ids, column_group_ids, estimate_result))) { - LOG_WARN("failed to do estimate block count and row count", K(ret)); - } else { + LOG_WARN("failed to do estimate block count and row count use best replication", K(ret)); + if (!ObAccessPathEstimation::is_retry_ret(ret)) { + // do nothing + } else if (OB_FALSE_IT(ret = OB_SUCCESS)) { + } else if (OB_FAIL(ObBasicStatsEstimator::do_estimate_block_count_and_row_count(*ctx_->get_exec_ctx(), + ctx_->get_session_info()->get_effective_tenant_id(), + param.table_id_, + true, + tablet_ids, + partition_ids, + column_group_ids, + estimate_result))) { + LOG_WARN("failed to do estimate block count and row count use leader replication", K(ret)); + } + } + + if (OB_SUCC(ret)) { for (int64_t i = 0; i < estimate_result.count(); ++i) { macro_block_num_ += estimate_result.at(i).macro_block_count_; micro_block_num_ += estimate_result.at(i).micro_block_count_; diff --git a/src/sql/optimizer/ob_opt_selectivity.cpp b/src/sql/optimizer/ob_opt_selectivity.cpp index 85272b0509..76037d2150 100644 --- a/src/sql/optimizer/ob_opt_selectivity.cpp +++ b/src/sql/optimizer/ob_opt_selectivity.cpp @@ -1047,7 +1047,7 @@ int ObOptSelectivity::calc_selectivity_by_dynamic_sampling(const OptSelectivityC int64_t start_time = ObTimeUtility::current_time(); bool throw_ds_error = false; if (OB_FAIL(dynamic_sampling.estimate_table_rowcount(ds_table_param, ds_result_items, throw_ds_error))) { - if (!throw_ds_error && !ObAccessPathEstimation::is_retry_ret(ret)) { + if (!throw_ds_error) { LOG_WARN("failed to estimate filter rowcount caused by some reason, please check!!!", K(ret), K(start_time), K(ObTimeUtility::current_time() - start_time), K(ds_table_param), K(ctx.get_session_info()->get_current_query_string()));