From 0e9c4cda2bf0f25718b8bfb90e2f318323ae8b29 Mon Sep 17 00:00:00 2001 From: xianyu-w <707512433@qq.com> Date: Thu, 11 Jul 2024 13:17:10 +0000 Subject: [PATCH] [CP] Fix limit group by cardinality estimation bug --- src/sql/optimizer/ob_log_distinct.cpp | 42 ++++++++++++++++++++++++++- src/sql/optimizer/ob_log_distinct.h | 5 +++- src/sql/optimizer/ob_log_group_by.cpp | 2 +- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/src/sql/optimizer/ob_log_distinct.cpp b/src/sql/optimizer/ob_log_distinct.cpp index ad3eb51bb..9910816d1 100644 --- a/src/sql/optimizer/ob_log_distinct.cpp +++ b/src/sql/optimizer/ob_log_distinct.cpp @@ -169,7 +169,15 @@ int ObLogDistinct::do_re_est_cost(EstimateCostInfo ¶m, double &card, double param.need_row_count_ < child_card && param.need_row_count_ < total_ndv_) { child_ndv = param.need_row_count_; - param.need_row_count_ = child_card * (1 - std::pow((1 - child_ndv / total_ndv_), total_ndv_ / child_card)); + if (input_sorted_) { + if (param.need_row_count_ > 1.0) { + param.need_row_count_ = child_card * (param.need_row_count_ - 1.0) / total_ndv_ + 1.0; + } else { + // do nothing + } + } else { + param.need_row_count_ = child_card * (1 - std::pow((1 - child_ndv / total_ndv_), total_ndv_ / child_card)); + } } else { param.need_row_count_ = -1; need_scale_ndv = true; @@ -395,5 +403,37 @@ int ObLogDistinct::check_use_child_ordering(bool &used, int64_t &inherit_child_o return ret; } +int ObLogDistinct::compute_property() +{ + int ret = OB_SUCCESS; + ObLogicalOperator *top = get_child(first_child); + bool need_sort = false; + int64_t prefix_pos = 0; + if (OB_ISNULL(top) || OB_ISNULL(get_plan())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(ObLogicalOperator::compute_property())) { + LOG_WARN("failed to compute op property", K(ret)); + } else if (MERGE_AGGREGATE == algo_) { + input_sorted_ = true; + } else if (OB_FAIL(ObOptimizerUtil::check_need_sort(distinct_exprs_, + NULL/*directions*/, + top->get_op_ordering(), + top->get_fd_item_set(), + top->get_output_equal_sets(), + top->get_output_const_exprs(), + get_plan()->get_onetime_query_refs(), + top->get_is_at_most_one_row(), + need_sort, + prefix_pos))) { + LOG_WARN("failed to check need sort", K(ret)); + } else if (!need_sort) { + input_sorted_ = true; + } else { + input_sorted_ = false; + } + return ret; +} + } } diff --git a/src/sql/optimizer/ob_log_distinct.h b/src/sql/optimizer/ob_log_distinct.h index 28ddd3f8e..1760e77ee 100644 --- a/src/sql/optimizer/ob_log_distinct.h +++ b/src/sql/optimizer/ob_log_distinct.h @@ -28,7 +28,8 @@ public: is_block_mode_(false), is_push_down_(false), total_ndv_(-1.0), - force_push_down_(false) + force_push_down_(false), + input_sorted_(false) { } virtual ~ObLogDistinct() { } @@ -76,6 +77,7 @@ public: inline void set_is_partition_gi(bool v) { is_partition_gi_ = v; } virtual int get_card_without_filter(double &card) override; virtual int check_use_child_ordering(bool &used, int64_t &inherit_child_ordering_index)override; + virtual int compute_property() override; private: common::ObSEArray distinct_exprs_; @@ -85,6 +87,7 @@ private: double total_ndv_; bool force_push_down_; // control by _aggregation_optimization_settings bool is_partition_gi_; + bool input_sorted_; private: DISALLOW_COPY_AND_ASSIGN(ObLogDistinct); }; diff --git a/src/sql/optimizer/ob_log_group_by.cpp b/src/sql/optimizer/ob_log_group_by.cpp index f5e59d5b2..83bece9f0 100644 --- a/src/sql/optimizer/ob_log_group_by.cpp +++ b/src/sql/optimizer/ob_log_group_by.cpp @@ -285,7 +285,7 @@ int ObLogGroupBy::do_re_est_cost(EstimateCostInfo ¶m, double &card, double & need_ndv /= selectivity; } if (child_card > 0) { - param.need_row_count_ = child_card * (1 - std::pow((1 - need_ndv / child_ndv), child_ndv / child_card)); + param.need_row_count_ = child_card * need_ndv / child_ndv; param.need_row_count_ /= number_of_copies; } else { param.need_row_count_ = 0;