From fea19bc54edf320d146661df55480edda5cbe0c9 Mon Sep 17 00:00:00 2001 From: obdev Date: Wed, 21 Feb 2024 09:19:05 +0000 Subject: [PATCH] [CP] reorder the order of complex filters --- src/sql/optimizer/ob_join_order.cpp | 2 +- src/sql/optimizer/ob_log_distinct.cpp | 7 ++ src/sql/optimizer/ob_log_distinct.h | 1 + src/sql/optimizer/ob_log_group_by.cpp | 7 ++ src/sql/optimizer/ob_log_group_by.h | 1 + src/sql/optimizer/ob_log_join.cpp | 17 +++++ src/sql/optimizer/ob_log_join.h | 1 + src/sql/optimizer/ob_log_set.cpp | 22 ++++++ src/sql/optimizer/ob_log_set.h | 1 + src/sql/optimizer/ob_log_table_scan.cpp | 7 ++ src/sql/optimizer/ob_log_table_scan.h | 1 + .../optimizer/ob_log_temp_table_access.cpp | 15 ++++ src/sql/optimizer/ob_log_temp_table_access.h | 1 + .../ob_log_temp_table_transformation.cpp | 12 ++++ .../ob_log_temp_table_transformation.h | 1 + src/sql/optimizer/ob_log_values.h | 6 ++ src/sql/optimizer/ob_logical_operator.cpp | 70 ++++++++++++++++++- src/sql/optimizer/ob_logical_operator.h | 14 ++++ .../optimizer/ob_opt_cost_model_parameter.cpp | 27 +++++++ .../optimizer/ob_opt_cost_model_parameter.h | 15 ++++ src/sql/optimizer/ob_opt_est_cost.cpp | 10 +++ src/sql/optimizer/ob_opt_est_cost.h | 5 ++ src/sql/optimizer/ob_opt_est_cost_model.cpp | 61 +++++++++++++++- src/sql/optimizer/ob_opt_est_cost_model.h | 4 ++ .../optimizer/ob_opt_est_parameter_normal.h | 8 +++ .../optimizer/ob_opt_est_parameter_vector.h | 8 +++ .../join/r/mysql/anti_semi_join.result | 14 ++-- .../test_suite/join/r/mysql/join_merge.result | 4 +- .../r/mysql/hash_distinct.result | 4 +- .../static_engine/r/mysql/table_scan.result | 6 +- .../subquery/r/mysql/subquery.result | 4 +- 31 files changed, 335 insertions(+), 21 deletions(-) diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index e0fe050734..75f8be8960 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -7020,7 +7020,7 @@ int JoinPath::cost_nest_loop_join(int64_t join_parallel, false, right_sort_keys_, server_cnt_); - if (OB_FAIL(ObOptEstCost::cost_nestloop(est_join_info, op_cost, + if (OB_FAIL(ObOptEstCost::cost_nestloop(est_join_info, op_cost, other_cond_sel_, plan->get_predicate_selectivities(), opt_ctx))) { LOG_WARN("failed to estimate nest loop join cost", K(est_join_info), K(ret)); diff --git a/src/sql/optimizer/ob_log_distinct.cpp b/src/sql/optimizer/ob_log_distinct.cpp index 936260b3ef..c3b072e5ea 100644 --- a/src/sql/optimizer/ob_log_distinct.cpp +++ b/src/sql/optimizer/ob_log_distinct.cpp @@ -376,5 +376,12 @@ int ObLogDistinct::print_used_hint(PlanText &plan_text) return ret; } +int ObLogDistinct::get_card_without_filter(double &card) +{ + int ret = OB_SUCCESS; + card = get_total_ndv(); + return ret; +} + } } diff --git a/src/sql/optimizer/ob_log_distinct.h b/src/sql/optimizer/ob_log_distinct.h index 453f271ebc..71030fdfb7 100644 --- a/src/sql/optimizer/ob_log_distinct.h +++ b/src/sql/optimizer/ob_log_distinct.h @@ -74,6 +74,7 @@ public: virtual int print_used_hint(PlanText &plan_text) override; inline bool is_partition_ig() const { return is_partition_gi_; } inline void set_is_partition_gi(bool v) { is_partition_gi_ = v; } + virtual int get_card_without_filter(double &card) override; private: common::ObSEArray distinct_exprs_; diff --git a/src/sql/optimizer/ob_log_group_by.cpp b/src/sql/optimizer/ob_log_group_by.cpp index 35dd355839..c071feb6aa 100644 --- a/src/sql/optimizer/ob_log_group_by.cpp +++ b/src/sql/optimizer/ob_log_group_by.cpp @@ -895,4 +895,11 @@ int ObLogGroupBy::compute_sharding_info() LOG_WARN("failed to compute sharding info", K(ret)); } return ret; +} + +int ObLogGroupBy::get_card_without_filter(double &card) +{ + int ret = OB_SUCCESS; + card = get_distinct_card(); + return ret; } \ No newline at end of file diff --git a/src/sql/optimizer/ob_log_group_by.h b/src/sql/optimizer/ob_log_group_by.h index 95c24870bf..a04c46cba9 100644 --- a/src/sql/optimizer/ob_log_group_by.h +++ b/src/sql/optimizer/ob_log_group_by.h @@ -233,6 +233,7 @@ public: VIRTUAL_TO_STRING_KV(K_(group_exprs), K_(rollup_exprs), K_(aggr_exprs), K_(algo), K_(distinct_card), K_(is_push_down)); + virtual int get_card_without_filter(double &card) override; private: virtual int inner_replace_op_exprs(ObRawExprReplacer &replacer) override; virtual int allocate_granule_post(AllocGIContext &ctx) override; diff --git a/src/sql/optimizer/ob_log_join.cpp b/src/sql/optimizer/ob_log_join.cpp index 1d98f7840f..ce8226d5ef 100644 --- a/src/sql/optimizer/ob_log_join.cpp +++ b/src/sql/optimizer/ob_log_join.cpp @@ -1508,3 +1508,20 @@ int ObLogJoin::allocate_startup_expr_post(int64_t child_idx) } return ret; } + +int ObLogJoin::get_card_without_filter(double &card) +{ + int ret = OB_SUCCESS; + card = 0; + ObLogicalOperator *child_op = NULL; + const JoinPath *path = static_cast(this)->get_join_path(); + if (OB_ISNULL(path)) { + //for late materialization + card = get_card(); + } else if (path->other_cond_sel_ > 0) { + card = get_card() / path->other_cond_sel_; + } else { + card = 1.0; + } + return ret; +} diff --git a/src/sql/optimizer/ob_log_join.h b/src/sql/optimizer/ob_log_join.h index fa25584a1e..3de8818fa4 100644 --- a/src/sql/optimizer/ob_log_join.h +++ b/src/sql/optimizer/ob_log_join.h @@ -164,6 +164,7 @@ namespace sql ObSqlPlanItem &plan_item) override; common::ObIArray &get_above_pushdown_left_params() { return above_pushdown_left_params_; } common::ObIArray &get_above_pushdown_right_params() { return above_pushdown_right_params_; } + virtual int get_card_without_filter(double &card) override; private: int set_use_batch(ObLogicalOperator* root); diff --git a/src/sql/optimizer/ob_log_set.cpp b/src/sql/optimizer/ob_log_set.cpp index 152cd5ec07..15a144da66 100644 --- a/src/sql/optimizer/ob_log_set.cpp +++ b/src/sql/optimizer/ob_log_set.cpp @@ -868,3 +868,25 @@ int ObLogSet::is_my_fixed_expr(const ObRawExpr *expr, bool &is_fixed) } return ret; } + +int ObLogSet::get_card_without_filter(double &card) +{ + int ret = OB_SUCCESS; + card = 0.0; + for (int64_t i = 0; OB_SUCC(ret) && i < get_num_of_child(); ++i) { + const ObLogicalOperator *child = get_child(i); + if (ObSelectStmt::UNION == get_set_op() && !is_set_distinct()) { + ObSelectStmt::SetOperator set_type = is_recursive_union() ? ObSelectStmt::RECURSIVE : ObSelectStmt::UNION; + if (0 == i) { + card = child->get_card(); + } else { + card = ObOptSelectivity::get_set_stmt_output_count(card, child->get_card(), set_type); + } + } else if (0 == i) { + card = child_ndv_.at(i); + } else { + card = ObOptSelectivity::get_set_stmt_output_count(card, child_ndv_.at(i), get_set_op()); + } + } + return ret; +} \ No newline at end of file diff --git a/src/sql/optimizer/ob_log_set.h b/src/sql/optimizer/ob_log_set.h index 20bb8430f4..c8b23134db 100644 --- a/src/sql/optimizer/ob_log_set.h +++ b/src/sql/optimizer/ob_log_set.h @@ -120,6 +120,7 @@ public: int construct_pq_set_hint(ObPQSetHint &hint); int set_child_ndv(ObIArray &ndv) { return child_ndv_.assign(ndv); } int add_child_ndv(double ndv) { return child_ndv_.push_back(ndv); } + virtual int get_card_without_filter(double &card) override; private: bool is_distinct_; bool is_recursive_union_; diff --git a/src/sql/optimizer/ob_log_table_scan.cpp b/src/sql/optimizer/ob_log_table_scan.cpp index 831cd49e37..2c350794e4 100644 --- a/src/sql/optimizer/ob_log_table_scan.cpp +++ b/src/sql/optimizer/ob_log_table_scan.cpp @@ -2130,3 +2130,10 @@ ObRawExpr * ObLogTableScan::get_real_expr(const ObRawExpr *col) const } return ret; } + +int ObLogTableScan::get_card_without_filter(double &card) +{ + int ret = OB_SUCCESS; + card = NULL != est_cost_info_ ? est_cost_info_->phy_query_range_row_count_ : 1.0; + return ret; +} diff --git a/src/sql/optimizer/ob_log_table_scan.h b/src/sql/optimizer/ob_log_table_scan.h index 4f95cb3315..0b0b6909db 100644 --- a/src/sql/optimizer/ob_log_table_scan.h +++ b/src/sql/optimizer/ob_log_table_scan.h @@ -479,6 +479,7 @@ public: int adjust_print_access_info(ObIArray &access_exprs); static int replace_gen_column(ObLogPlan *plan, ObRawExpr *part_expr, ObRawExpr *&new_part_expr); int extract_file_column_exprs_recursively(ObRawExpr *expr); + virtual int get_card_without_filter(double &card) override; private: // member functions //called when index_back_ set int pick_out_query_range_exprs(); diff --git a/src/sql/optimizer/ob_log_temp_table_access.cpp b/src/sql/optimizer/ob_log_temp_table_access.cpp index 838ce663d7..c10286ac4f 100644 --- a/src/sql/optimizer/ob_log_temp_table_access.cpp +++ b/src/sql/optimizer/ob_log_temp_table_access.cpp @@ -208,3 +208,18 @@ int ObLogTempTableAccess::get_temp_table_plan(ObLogicalOperator *& insert_op) } return ret; } + +int ObLogTempTableAccess::get_card_without_filter(double &card) +{ + int ret = OB_SUCCESS; + ObLogicalOperator *child_op = NULL; + if (OB_FAIL(get_temp_table_plan(child_op))) { + LOG_WARN("failed to get temp table plan", K(ret)); + } else if (OB_ISNULL(child_op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect null operator", K(ret)); + } else { + card = child_op->get_card(); + } + return ret; +} diff --git a/src/sql/optimizer/ob_log_temp_table_access.h b/src/sql/optimizer/ob_log_temp_table_access.h index 3c43ca1882..57b6a58b9f 100644 --- a/src/sql/optimizer/ob_log_temp_table_access.h +++ b/src/sql/optimizer/ob_log_temp_table_access.h @@ -44,6 +44,7 @@ public: virtual int get_plan_item_info(PlanText &plan_text, ObSqlPlanItem &plan_item) override; int get_temp_table_plan(ObLogicalOperator *& insert_op); + virtual int get_card_without_filter(double &card) override; private: DISALLOW_COPY_AND_ASSIGN(ObLogTempTableAccess); diff --git a/src/sql/optimizer/ob_log_temp_table_transformation.cpp b/src/sql/optimizer/ob_log_temp_table_transformation.cpp index 89674c4af9..76fd7708c4 100644 --- a/src/sql/optimizer/ob_log_temp_table_transformation.cpp +++ b/src/sql/optimizer/ob_log_temp_table_transformation.cpp @@ -149,3 +149,15 @@ int ObLogTempTableTransformation::do_re_est_cost(EstimateCostInfo ¶m, double } return ret; } + +int ObLogTempTableTransformation::get_card_without_filter(double &card) +{ + int ret = OB_SUCCESS; + ObLogicalOperator *child_op = NULL; + if (OB_NOT_NULL(child_op = get_child(get_num_of_child() - 1))) { + card = child_op->get_card(); + } else { + card = get_card(); + } + return ret; +} \ No newline at end of file diff --git a/src/sql/optimizer/ob_log_temp_table_transformation.h b/src/sql/optimizer/ob_log_temp_table_transformation.h index 12f87e2ad0..07b8778a33 100644 --- a/src/sql/optimizer/ob_log_temp_table_transformation.h +++ b/src/sql/optimizer/ob_log_temp_table_transformation.h @@ -35,6 +35,7 @@ public: virtual int do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_cost, double &cost) override; int get_temp_table_exprs(ObIArray &set_exprs) const; int allocate_startup_expr_post() override; + virtual int get_card_without_filter(double &card) override; }; } // end of namespace sql diff --git a/src/sql/optimizer/ob_log_values.h b/src/sql/optimizer/ob_log_values.h index c3abf55743..59f00ceaba 100644 --- a/src/sql/optimizer/ob_log_values.h +++ b/src/sql/optimizer/ob_log_values.h @@ -69,6 +69,12 @@ class ObLogValues : public ObLogicalOperator } return ret; } + virtual int get_card_without_filter(double &card) override + { + int ret = OB_SUCCESS; + card = 1.0; + return ret; + } private: ObLogPlan *explain_plan_; common::ObRowStore row_store_; diff --git a/src/sql/optimizer/ob_logical_operator.cpp b/src/sql/optimizer/ob_logical_operator.cpp index 48d067fa9c..b9d90ba3fc 100644 --- a/src/sql/optimizer/ob_logical_operator.cpp +++ b/src/sql/optimizer/ob_logical_operator.cpp @@ -60,6 +60,7 @@ #include "sql/engine/px/p2p_datahub/ob_p2p_dh_mgr.h" #include "sql/engine/expr/ob_expr_join_filter.h" #include "sql/engine/px/p2p_datahub/ob_runtime_filter_query_range.h" +#include "sql/optimizer/ob_opt_est_parameter_normal.h" using namespace oceanbase::sql; @@ -501,6 +502,18 @@ double FilterCompare::get_selectivity(ObRawExpr *expr) return selectivity; } +int ObLogicalOperator::get_card_without_filter(double &card) +{ + int ret = OB_SUCCESS; + ObLogicalOperator *child_op = NULL; + if (OB_NOT_NULL(child_op = get_child(ObLogicalOperator::first_child))) { + card = child_op->get_card(); + } else { + card = 1.0; + } + return ret; +} + // Add a child to the end of the array int ObLogicalOperator::add_child(ObLogicalOperator *child_op) { @@ -2524,9 +2537,60 @@ int ObLogicalOperator::reorder_filter_exprs() if (OB_ISNULL(get_plan())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Get unexpeced null", K(ret), K(get_plan())); - } else { - FilterCompare filter_compare(get_plan()->get_predicate_selectivities()); - std::sort(filter_exprs_.begin(), filter_exprs_.end(), filter_compare); + } else if (OB_FAIL(reorder_filters_exprs(get_plan()->get_predicate_selectivities(), + filter_exprs_))) { + LOG_WARN("reorder filter exprs failed", K(ret)); + } else if (log_op_def::LOG_JOIN == get_type()) { + ObLogJoin *join_op = static_cast(this); + if (OB_FAIL(reorder_filters_exprs(get_plan()->get_predicate_selectivities(), + join_op->get_join_filters()))) { + LOG_WARN("reorder join filters failed", K(ret)); + } + } + return ret; +} + +int ObLogicalOperator::reorder_filters_exprs(common::ObIArray &predicate_selectivities, + ObIArray &filter_exprs) +{ + int ret = OB_SUCCESS; + double card = 0; + FilterCompare filter_compare(predicate_selectivities); + common::ObSEArray filter_ranks; + if (OB_ISNULL(get_plan())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(get_plan())); + } else if (OB_FAIL(get_card_without_filter(card))) { + LOG_WARN("get num of rows to be filtered failed", K(ret)); + } else if (card < 1.0) { + card = 1.0; + } + for (int64_t i = 0; OB_SUCC(ret) && i < filter_exprs.count(); ++i) { + double cost_per_tuple = 0.0; + double sel = filter_compare.get_selectivity(filter_exprs.at(i)); + double rank = 0; + if (sel < 0) { + // security filter should be calc firstly + rank = -NAN; + } else if (OB_FAIL(ObOptEstCost::calc_pred_cost_per_row(filter_exprs.at(i), + card, + cost_per_tuple, + get_plan()->get_optimizer_context()))) { + LOG_WARN("calc pred cost failed", K(ret)); + } else { + rank = (sel - 1) / cost_per_tuple; + } + if (OB_SUCC(ret)) { + if (OB_FAIL(filter_ranks.push_back(ObExprRankPair(rank, filter_exprs.at(i))))) { + LOG_WARN("push back failed", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + std::sort(filter_ranks.begin(), filter_ranks.end(), ObExprRankPairCompare()); + for(int64_t i = 0; i < filter_ranks.count(); ++i) { + filter_exprs.at(i) = filter_ranks.at(i).second; + } } return ret; } diff --git a/src/sql/optimizer/ob_logical_operator.h b/src/sql/optimizer/ob_logical_operator.h index e6c6be35b4..30bcbfa710 100644 --- a/src/sql/optimizer/ob_logical_operator.h +++ b/src/sql/optimizer/ob_logical_operator.h @@ -279,6 +279,17 @@ struct FilterCompare common::ObIArray &predicate_selectivities_; }; +typedef std::pair ObExprRankPair; + +struct ObExprRankPairCompare +{ + ObExprRankPairCompare() {}; + bool operator()(ObExprRankPair &left, ObExprRankPair &right) + { + return left.first < right.first; + } +}; + class AdjustSortContext { public: @@ -1634,6 +1645,8 @@ public: * be evaluated earlier. */ int reorder_filter_exprs(); + int reorder_filters_exprs(common::ObIArray &predicate_selectivities, + ObIArray &filters_exprs); int find_shuffle_join_filter(bool &find) const; int has_window_function_below(bool &has_win_func) const; @@ -1849,6 +1862,7 @@ private: // alloc mat for sync in intput int need_alloc_material_for_push_down_wf(ObLogicalOperator &curr_op, bool &need_alloc); int check_need_parallel_valid(int64_t need_parallel) const; + virtual int get_card_without_filter(double &card); private: ObLogicalOperator *parent_; // parent operator bool is_plan_root_; // plan root operator diff --git a/src/sql/optimizer/ob_opt_cost_model_parameter.cpp b/src/sql/optimizer/ob_opt_cost_model_parameter.cpp index 20876e9620..5255ffa99d 100644 --- a/src/sql/optimizer/ob_opt_cost_model_parameter.cpp +++ b/src/sql/optimizer/ob_opt_cost_model_parameter.cpp @@ -383,3 +383,30 @@ double ObOptCostModelParameter::get_hash_cost(const OptSystemStat& stat, int64_t return cost / stat.get_cpu_speed(); } } + +double ObOptCostModelParameter::get_cmp_lob_cost(const OptSystemStat& stat) const +{ + if (stat.get_cpu_speed() <= 0) { + return CMP_LOB_COST; + } else { + return CMP_LOB_COST / stat.get_cpu_speed(); + } +} + +double ObOptCostModelParameter::get_cmp_udf_cost(const OptSystemStat& stat) const +{ + if (stat.get_cpu_speed() <= 0) { + return CMP_UDF_COST; + } else { + return CMP_UDF_COST / stat.get_cpu_speed(); + } +} + +double ObOptCostModelParameter::get_cmp_err_handle_expr_cost(const OptSystemStat& stat) const +{ + if (stat.get_cpu_speed() <= 0) { + return CMP_ERR_HANDLE_EXPR_COST; + } else { + return CMP_ERR_HANDLE_EXPR_COST / stat.get_cpu_speed(); + } +} diff --git a/src/sql/optimizer/ob_opt_cost_model_parameter.h b/src/sql/optimizer/ob_opt_cost_model_parameter.h index 3f670bf6c7..1006629866 100644 --- a/src/sql/optimizer/ob_opt_cost_model_parameter.h +++ b/src/sql/optimizer/ob_opt_cost_model_parameter.h @@ -69,6 +69,9 @@ public: const double DEFAULT_DELETE_CHECK_PER_ROW_COST, const double DEFAULT_SPATIAL_PER_ROW_COST, const double DEFAULT_RANGE_COST, + const double DEFAULT_CMP_UDF_COST, + const double DEFAULT_CMP_LOB_COST, + const double DEFAULT_CMP_ERR_HANDLE_EXPR_COST, const double (&comparison_params)[common::ObMaxTC + 1], const double (&hash_params)[common::ObMaxTC + 1], const double (&project_params)[2][2][MAX_PROJECT_TYPE] @@ -109,6 +112,9 @@ public: DELETE_CHECK_PER_ROW_COST(DEFAULT_DELETE_CHECK_PER_ROW_COST), SPATIAL_PER_ROW_COST(DEFAULT_SPATIAL_PER_ROW_COST), RANGE_COST(DEFAULT_RANGE_COST), + CMP_UDF_COST(DEFAULT_CMP_UDF_COST), + CMP_LOB_COST(DEFAULT_CMP_LOB_COST), + CMP_ERR_HANDLE_EXPR_COST(DEFAULT_CMP_ERR_HANDLE_EXPR_COST), comparison_params_(comparison_params), hash_params_(hash_params), project_params_(project_params) @@ -157,6 +163,9 @@ public: double get_range_cost(const OptSystemStat& stat) const; double get_comparison_cost(const OptSystemStat& stat, int64_t type) const; double get_hash_cost(const OptSystemStat& stat, int64_t type) const; + double get_cmp_lob_cost(const OptSystemStat& stat) const; + double get_cmp_udf_cost(const OptSystemStat& stat) const; + double get_cmp_err_handle_expr_cost(const OptSystemStat& stat) const; protected: /** 读取一行的CPU开销,基本上只包括get_next_row()操作 */ @@ -229,6 +238,12 @@ protected: double SPATIAL_PER_ROW_COST; //存储层切换一次range的代价 double RANGE_COST; + //计算一个UDF的代价 + double CMP_UDF_COST; + //计算一个返回值为LOB的表达式的代价 + double CMP_LOB_COST; + //计算一个需处理异常的表达式的代价 + double CMP_ERR_HANDLE_EXPR_COST; const double (&comparison_params_)[common::ObMaxTC + 1]; const double (&hash_params_)[common::ObMaxTC + 1]; /* diff --git a/src/sql/optimizer/ob_opt_est_cost.cpp b/src/sql/optimizer/ob_opt_est_cost.cpp index 99ef3ad9d7..58279f3180 100644 --- a/src/sql/optimizer/ob_opt_est_cost.cpp +++ b/src/sql/optimizer/ob_opt_est_cost.cpp @@ -51,6 +51,7 @@ const int64_t ObOptEstCost::MAX_STORAGE_RANGE_ESTIMATION_NUM = 10; int ObOptEstCost::cost_nestloop(const ObCostNLJoinInfo &est_cost_info, double &cost, + double &filter_selectivity, ObIArray &all_predicate_sel, const ObOptimizerContext &opt_ctx) { @@ -58,6 +59,7 @@ int ObOptEstCost::cost_nestloop(const ObCostNLJoinInfo &est_cost_info, GET_COST_MODEL(); if (OB_FAIL(model->cost_nestloop(est_cost_info, cost, + filter_selectivity, all_predicate_sel))) { LOG_WARN("failed to est cost for nestloop join", K(ret)); } @@ -756,3 +758,11 @@ int ObOptEstCost::stat_estimate_single_range_rc(const ObCostTableScanInfo &est_c return ret; } +double ObOptEstCost::calc_pred_cost_per_row(const ObRawExpr *expr, + double card, + double &cost, + const ObOptimizerContext &opt_ctx) +{ + GET_COST_MODEL(); + return model->calc_pred_cost_per_row(expr, card, cost); +} \ No newline at end of file diff --git a/src/sql/optimizer/ob_opt_est_cost.h b/src/sql/optimizer/ob_opt_est_cost.h index cce94bae2f..8c431a880c 100644 --- a/src/sql/optimizer/ob_opt_est_cost.h +++ b/src/sql/optimizer/ob_opt_est_cost.h @@ -38,6 +38,7 @@ public: static int cost_nestloop(const ObCostNLJoinInfo &est_cost_info, double &cost, + double &filter_selectivity, common::ObIArray &all_predicate_sel, const ObOptimizerContext &opt_ctx); @@ -241,6 +242,10 @@ public: double &count); static double get_estimate_width_from_type(const ObExprResType &type); + static double calc_pred_cost_per_row(const ObRawExpr *expr, + double card, + double &cost, + const ObOptimizerContext &opt_ctx); private: DISALLOW_COPY_AND_ASSIGN(ObOptEstCost); }; diff --git a/src/sql/optimizer/ob_opt_est_cost_model.cpp b/src/sql/optimizer/ob_opt_est_cost_model.cpp index 08e0b5c754..872a5fde40 100644 --- a/src/sql/optimizer/ob_opt_est_cost_model.cpp +++ b/src/sql/optimizer/ob_opt_est_cost_model.cpp @@ -179,6 +179,7 @@ double ObIndexMetaInfo::get_micro_block_numbers() const */ int ObOptEstCostModel::cost_nestloop(const ObCostNLJoinInfo &est_cost_info, double &cost, + double &filter_selectivity, ObIArray &all_predicate_sel) { int ret = OB_SUCCESS; @@ -191,7 +192,7 @@ int ObOptEstCostModel::cost_nestloop(const ObCostNLJoinInfo &est_cost_info, double right_rows = est_cost_info.right_rows_; double cart_tuples = left_rows * right_rows; // tuples of Cartesian product double out_tuples = 0.0; - double filter_selectivity = 0.0; + filter_selectivity = 0.0; double material_cost = 0.0; //selectivity for equal conds if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, @@ -2285,3 +2286,61 @@ int ObOptEstCostModel::calc_range_cost(const ObTableMetaInfo& table_meta_info, cost = io_cost + cpu_cost; return ret; } + +int ObOptEstCostModel::calc_pred_cost_per_row(const ObRawExpr *expr, + double card, + double &cost) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + double rows = expr->is_const_expr() && card > 0 ? card : 1; + bool need_calc_child_cost = true; + if (IS_SPATIAL_OP(expr->get_expr_type()) + || IS_GEO_OP(expr->get_expr_type()) + || expr->is_json_expr() + || expr->is_xml_expr()) { + cost += cost_params_.get_cmp_spatial_cost(sys_stat_) / rows; + } else if (expr->is_udf_expr()) { + cost += cost_params_.get_cmp_udf_cost(sys_stat_) / rows; + } else if (ob_is_lob_locator(expr->get_result_type().get_type())) { + cost += cost_params_.get_cmp_lob_cost(sys_stat_) / rows; + } else if (T_OP_DIV == expr->get_expr_type()) { + cost += cost_params_.get_cmp_err_handle_expr_cost(sys_stat_) / rows; + } else if (T_FUN_SYS_CAST == expr->get_expr_type()) { + if (OB_ISNULL(expr->get_param_expr(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + ObObjType src = expr->get_param_expr(0)->get_result_type().get_type(); + ObObjType dst = expr->get_result_type().get_type(); + if (ob_is_string_type(src) && + (ob_is_numeric_type(dst) || ob_is_temporal_type(dst))) { + cost += cost_params_.get_cmp_err_handle_expr_cost(sys_stat_) / rows; + } else { + cost += cost_params_.get_comparison_cost(sys_stat_,ObIntTC) / rows; + } + } + } else if (T_OP_IN == expr->get_expr_type()) { + if (expr->get_param_count() != 2 || OB_ISNULL(expr->get_param_expr(1))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid in params", K(ret)); + } else { + cost += (expr->get_param_expr(1)->get_param_count() + 1) * cost_params_.get_comparison_cost(sys_stat_,ObIntTC) / rows; + } + need_calc_child_cost = false; + } else { + cost += cost_params_.get_comparison_cost(sys_stat_,ObIntTC) / rows; + } + if (need_calc_child_cost) { + for (int64_t i = 0; OB_SUCC(ret) && i < expr->get_param_count(); ++i) { + if (OB_FAIL(SMART_CALL(calc_pred_cost_per_row(expr->get_param_expr(i), card, cost)))) { + LOG_WARN("calc cost per tuple failed", K(ret), KPC(expr)); + } + } + } + } + return ret; +} diff --git a/src/sql/optimizer/ob_opt_est_cost_model.h b/src/sql/optimizer/ob_opt_est_cost_model.h index a41222c453..3d04734d68 100644 --- a/src/sql/optimizer/ob_opt_est_cost_model.h +++ b/src/sql/optimizer/ob_opt_est_cost_model.h @@ -656,6 +656,7 @@ public: int cost_nestloop(const ObCostNLJoinInfo &est_cost_info, double &cost, + double &filter_selectivity, common::ObIArray &all_predicate_sel); int cost_mergejoin(const ObCostMergeJoinInfo &est_cost_info, @@ -804,6 +805,9 @@ public: int64_t range_count, double range_sel, double &cost); + int calc_pred_cost_per_row(const ObRawExpr *expr, + double card, + double &cost); protected: int cost_sort(const ObSortCostInfo &cost_info, diff --git a/src/sql/optimizer/ob_opt_est_parameter_normal.h b/src/sql/optimizer/ob_opt_est_parameter_normal.h index a78fdae981..24eb14f94a 100644 --- a/src/sql/optimizer/ob_opt_est_parameter_normal.h +++ b/src/sql/optimizer/ob_opt_est_parameter_normal.h @@ -86,6 +86,11 @@ const static double NORMAL_HASH_NUMBER_COST = 0.02702563229230158730158730158 * const static double NORMAL_HASH_CHAR_COST = 0.2306996323278174603174603175 * DEFAULT_CPU_SPEED; const static double NORMAL_INVALID_HASH_COST = -1; +//mock complex expr cost +const static double NORMAL_CMP_UDF_COST = 100.0 * DEFAULT_CPU_SPEED; +const static double NORMAL_CMP_LOB_COST = 9.707028746051587301587301588 * DEFAULT_CPU_SPEED; //NORMAL_CMP_CHAR_COST * 100 +const static double NORMAL_CMP_ERR_HANDLE_EXPR_COST = 1.00087103407539 * DEFAULT_CPU_SPEED; //NORMAL_CMP_INT_COST * 100 + const static double comparison_params_normal[ObMaxTC+1] = { NORMAL_CMP_INT_COST, // null NORMAL_CMP_INT_COST, // int8, int16, int24, int32, int64. @@ -209,6 +214,9 @@ const static ObOptCostModelParameter cost_params_normal( NORMAL_DELETE_CHECK_PER_ROW_COST, NORMAL_SPATIAL_PER_ROW_COST, NORMAL_RANGE_COST, + NORMAL_CMP_UDF_COST, + NORMAL_CMP_LOB_COST, + NORMAL_CMP_ERR_HANDLE_EXPR_COST, comparison_params_normal, hash_params_normal, project_params_normal diff --git a/src/sql/optimizer/ob_opt_est_parameter_vector.h b/src/sql/optimizer/ob_opt_est_parameter_vector.h index 62941d9247..7acf2855c8 100644 --- a/src/sql/optimizer/ob_opt_est_parameter_vector.h +++ b/src/sql/optimizer/ob_opt_est_parameter_vector.h @@ -86,6 +86,11 @@ const static double VECTOR_HASH_NUMBER_COST = 0.01494804432806 * DEFAULT_CPU_SPE const static double VECTOR_HASH_CHAR_COST = 0.18684685876579 * DEFAULT_CPU_SPEED; const static double VECTOR_INVALID_HASH_COST = -1; +//mock complex expr cost +const static double VECTOR_CMP_UDF_COST = 100.0 * DEFAULT_CPU_SPEED; +const static double VECTOR_CMP_LOB_COST = 9.707028746051587301587301588 * DEFAULT_CPU_SPEED; //NORMAL_CMP_CHAR_COST * 100 +const static double VECTOR_CMP_ERR_HANDLE_EXPR_COST = 1.00087103407539 * DEFAULT_CPU_SPEED; //NORMAL_CMP_INT_COST * 100 + const static double comparison_params_vector[ObMaxTC+1] = { VECTOR_CMP_INT_COST, // null VECTOR_CMP_INT_COST, // int8, int16, int24, int32, int64. @@ -208,6 +213,9 @@ const static ObOptCostModelParameter cost_params_vector( VECTOR_DELETE_CHECK_PER_ROW_COST, VECTOR_SPATIAL_PER_ROW_COST, VECTOR_RANGE_COST, + VECTOR_CMP_UDF_COST, + VECTOR_CMP_LOB_COST, + VECTOR_CMP_ERR_HANDLE_EXPR_COST, comparison_params_vector, hash_params_vector, project_params_vector diff --git a/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result b/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result index f7e04ef3e4..0a9e0afe8d 100644 --- a/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result +++ b/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result @@ -123,7 +123,7 @@ Query Plan Outputs & filters: ------------------------------------- 0 - output([xy_t1.c1], [xy_t1.c2]), filter(nil), rowset=16 - conds([xy_t1.c1 = xy_t2.c1], [xy_t1.c2 > xy_t2.c2]), nl_params_(nil), use_batch=false + conds([xy_t1.c2 > xy_t2.c2], [xy_t1.c1 = xy_t2.c1]), nl_params_(nil), use_batch=false 1 - output([xy_t1.c1], [xy_t1.c2]), filter(nil), rowset=16 access([xy_t1.c1], [xy_t1.c2]), partitions(p0) is_index_back=false, is_global_index=false, @@ -343,7 +343,7 @@ Query Plan Outputs & filters: ------------------------------------- 0 - output([xy_t1.c1], [xy_t1.c2]), filter(nil), rowset=16 - conds([xy_t1.c1 = xy_t2.c1], [xy_t1.c2 > xy_t2.c2]), nl_params_(nil), use_batch=false + conds([xy_t1.c2 > xy_t2.c2], [xy_t1.c1 = xy_t2.c1]), nl_params_(nil), use_batch=false 1 - output([xy_t1.c1], [xy_t1.c2]), filter(nil), rowset=16 access([xy_t1.c1], [xy_t1.c2]), partitions(p0) is_index_back=false, is_global_index=false, @@ -438,7 +438,7 @@ Query Plan Outputs & filters: ------------------------------------- 0 - output([xy_t1.c1], [xy_t1.c2], [xy_t1.c1], [xy_t1.c2]), filter(nil), rowset=16 - conds([xy_t1.c1 = xy_t2.c1], [xy_t1.c2 > xy_t2.c2]), nl_params_(nil), use_batch=false + conds([xy_t1.c2 > xy_t2.c2], [xy_t1.c1 = xy_t2.c1]), nl_params_(nil), use_batch=false 1 - output([xy_t1.c1], [xy_t1.c2]), filter(nil), rowset=16 access([xy_t1.c1], [xy_t1.c2]), partitions(p0) is_index_back=false, is_global_index=false, @@ -549,7 +549,7 @@ Query Plan Outputs & filters: ------------------------------------- 0 - output([xy_t1.c1], [xy_t1.c2], [xy_t1.c1], [xy_t1.c2]), filter(nil), rowset=16 - conds([xy_t1.c1 = xy_t2.c1], [xy_t1.c2 > xy_t2.c2]), nl_params_(nil), use_batch=false + conds([xy_t1.c2 > xy_t2.c2], [xy_t1.c1 = xy_t2.c1]), nl_params_(nil), use_batch=false 1 - output([xy_t1.c1], [xy_t1.c2]), filter(nil), rowset=16 access([xy_t1.c1], [xy_t1.c2]), partitions(p0) is_index_back=false, is_global_index=false, @@ -731,7 +731,7 @@ Query Plan Outputs & filters: ------------------------------------- 0 - output([xy_x_t.x1], [xy_x_t.x2]), filter(nil), rowset=256 - conds([xy_y_t.y2 < xy_x_t.x2], [xy_x_t.x1 = xy_y_t.y1]), nl_params_(nil), use_batch=false + conds([xy_x_t.x1 = xy_y_t.y1], [xy_y_t.y2 < xy_x_t.x2]), nl_params_(nil), use_batch=false 1 - output([xy_x_t.x2], [xy_x_t.x1]), filter(nil), rowset=256 access([xy_x_t.x2], [xy_x_t.x1]), partitions(p0) is_index_back=false, is_global_index=false, @@ -4042,7 +4042,7 @@ Outputs & filters: 0 - output([xy_t1.c2], [xy_t1.c3]), filter(nil), rowset=16 sort_keys([xy_t1.c2, ASC], [xy_t1.c3, ASC]) 1 - output([xy_t1.c2], [xy_t1.c3]), filter(nil), rowset=16 - conds([xy_t1.c2 = xy_t2.c2], [xy_t1.c3 < xy_t2.c3]), nl_params_(nil), use_batch=false + conds([xy_t1.c3 < xy_t2.c3], [xy_t1.c2 = xy_t2.c2]), nl_params_(nil), use_batch=false 2 - output([xy_t1.c2], [xy_t1.c3]), filter(nil), rowset=16 access([xy_t1.c2], [xy_t1.c3]), partitions(p0) is_index_back=false, is_global_index=false, @@ -4192,7 +4192,7 @@ Outputs & filters: 0 - output([xy_t2.c2], [xy_t2.c3]), filter(nil), rowset=16 sort_keys([xy_t2.c2, ASC], [xy_t2.c3, ASC]) 1 - output([xy_t2.c2], [xy_t2.c3]), filter(nil), rowset=16 - conds([xy_t1.c2 = xy_t2.c2], [xy_t1.c3 < xy_t2.c3]), nl_params_(nil), use_batch=false + conds([xy_t1.c3 < xy_t2.c3], [xy_t1.c2 = xy_t2.c2]), nl_params_(nil), use_batch=false 2 - output([xy_t2.c2], [xy_t2.c3]), filter(nil), rowset=16 access([xy_t2.c2], [xy_t2.c3]), partitions(p0) is_index_back=false, is_global_index=false, diff --git a/tools/deploy/mysql_test/test_suite/join/r/mysql/join_merge.result b/tools/deploy/mysql_test/test_suite/join/r/mysql/join_merge.result index c237466d61..f527899861 100644 --- a/tools/deploy/mysql_test/test_suite/join/r/mysql/join_merge.result +++ b/tools/deploy/mysql_test/test_suite/join/r/mysql/join_merge.result @@ -1504,7 +1504,7 @@ Outputs & filters: 0 - output([t2.a], [t2.b], [t2.c], [t7.a], [t7.b], [t7.c], [t7.d]), filter(nil), rowset=16 sort_keys([t2.a, ASC], [t2.b, ASC], [t7.a, ASC], [t7.b, ASC]) 1 - output([t2.a], [t2.b], [t7.a], [t7.b], [t2.c], [t7.c], [t7.d]), filter(nil), rowset=16 - conds([cast(t2.b, DECIMAL(-1, -1)) = cast(t7.a, DECIMAL(10, 0))], [t2.c = t7.d]), nl_params_(nil), use_batch=false + conds([t2.c = t7.d], [cast(t2.b, DECIMAL(-1, -1)) = cast(t7.a, DECIMAL(10, 0))]), nl_params_(nil), use_batch=false 2 - output([t7.a], [t7.d], [t7.b], [t7.c]), filter(nil), rowset=16 access([t7.a], [t7.d], [t7.b], [t7.c]), partitions(p0) is_index_back=false, is_global_index=false, @@ -1669,7 +1669,7 @@ Outputs & filters: 0 - output([t2.a], [t2.b], [t2.c], [t7.a], [t7.b], [t7.c], [t7.d]), filter(nil), rowset=16 sort_keys([t2.a, ASC], [t2.b, ASC], [t7.a, ASC], [t7.b, ASC]) 1 - output([t2.a], [t2.b], [t7.a], [t7.b], [t2.c], [t7.c], [t7.d]), filter(nil), rowset=16 - conds([cast(t2.b, DECIMAL(-1, -1)) = cast(t7.a, DECIMAL(10, 0))], [t2.c = t7.d]), nl_params_(nil), use_batch=false + conds([t2.c = t7.d], [cast(t2.b, DECIMAL(-1, -1)) = cast(t7.a, DECIMAL(10, 0))]), nl_params_(nil), use_batch=false 2 - output([t2.b], [t2.c], [t2.a]), filter(nil), rowset=16 access([t2.b], [t2.c], [t2.a]), partitions(p0) is_index_back=false, is_global_index=false, diff --git a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result index 1c222839c7..4263758870 100644 --- a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result +++ b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result @@ -5964,7 +5964,7 @@ Outputs & filters: ------------------------------------- 0 - output([T_FUN_COUNT(*)]), filter(nil), rowset=256 group(nil), agg_func([T_FUN_COUNT(*)]) - 1 - output(nil), filter([concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [result.t2c0 = 1489403758], [concat(result.t0c0, '') IS NULL], [result.t0c1 = 398204275]), rowset=256 + 1 - output(nil), filter([result.t2c0 = 1489403758], [concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [concat(result.t0c0, '') IS NULL], [result.t0c1 = 398204275]), rowset=256 access([result.t0c0], [result.t0c1], [result.t2c0], [result.t1c0]) 2 - output([t0.c0], [t0.c1], [t2.c0], [t1.c0]), filter(nil), rowset=256 limit(2147483647), offset(0) @@ -6020,7 +6020,7 @@ Outputs & filters: ------------------------------------- 0 - output([T_FUN_COUNT(*)]), filter(nil), rowset=256 group(nil), agg_func([T_FUN_COUNT(*)]) - 1 - output(nil), filter([concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [result.t2c0 = 1489403758], [concat(result.t0c0, '') IS NULL], [result.t0c1 = 398204275]), rowset=256 + 1 - output(nil), filter([result.t2c0 = 1489403758], [concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [concat(result.t0c0, '') IS NULL], [result.t0c1 = 398204275]), rowset=256 access([result.t0c0], [result.t0c1], [result.t2c0], [result.t1c0]) 2 - output([t0.c0], [t0.c1], [t2.c0], [t1.c0]), filter(nil), rowset=256 limit(2147483647), offset(0) diff --git a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result index dfd7303a62..305a3cc3f9 100644 --- a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result +++ b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result @@ -186,10 +186,10 @@ Query Plan ================================================== Outputs & filters: ------------------------------------- - 0 - output([t1.c1], [t1.c2], [t1.c3], [t1.c4], [t1.c5]), filter([t1.c1 + cast(cast(2, DECIMAL_INT(10, 0)), DECIMAL_INT(10, 0)) < cast(10, DECIMAL_INT(11, - 0))], [t1.c4 > t1.c3]), rowset=16 + 0 - output([t1.c1], [t1.c2], [t1.c3], [t1.c4], [t1.c5]), filter([t1.c4 > t1.c3], [t1.c1 + cast(cast(2, DECIMAL_INT(10, 0)), DECIMAL_INT(10, 0)) < cast(10, + DECIMAL_INT(11, 0))]), rowset=16 access([t1.c1], [t1.c2], [t1.c4], [t1.c3], [t1.c5]), partitions(p0) - is_index_back=true, is_global_index=false, filter_before_indexback[true,false], + is_index_back=true, is_global_index=false, filter_before_indexback[false,true], range_key([t1.c2], [t1.c1]), range(2,MIN ; 2,MAX), range_cond([t1.c2 = cast(2, DECIMAL_INT(10, 0))]) select * from t1 where c2 = 2 and c1 + c2 < 10 and c4 > c3; diff --git a/tools/deploy/mysql_test/test_suite/subquery/r/mysql/subquery.result b/tools/deploy/mysql_test/test_suite/subquery/r/mysql/subquery.result index 2963dbabe8..4d41a8ae84 100644 --- a/tools/deploy/mysql_test/test_suite/subquery/r/mysql/subquery.result +++ b/tools/deploy/mysql_test/test_suite/subquery/r/mysql/subquery.result @@ -975,7 +975,7 @@ Outputs & filters: range_key([t1.__pk_increment]), range(MIN ; MAX)always true 2 - output([1]), filter(nil), rowset=16 limit(1), offset(nil) - 3 - output(nil), filter([T_FUN_SUM(t2.a) > cast(1, DECIMAL_INT(33, 0))], [T_FUN_COUNT(t2.a) > 0]), rowset=16 + 3 - output(nil), filter([T_FUN_COUNT(t2.a) > 0], [T_FUN_SUM(t2.a) > cast(1, DECIMAL_INT(33, 0))]), rowset=16 group([t2.a]), agg_func([T_FUN_SUM(t2.a)], [T_FUN_COUNT(t2.a)]) 4 - output([t2.a]), filter(nil), rowset=16 access([t2.a]), partitions(p0) @@ -1131,7 +1131,7 @@ Outputs & filters: range_key([t1.__pk_increment]), range(MIN ; MAX)always true 2 - output([1]), filter(nil), rowset=16 limit(1), offset(nil) - 3 - output(nil), filter([T_FUN_SUM(t2.a) > cast(1, DECIMAL_INT(33, 0))], [T_FUN_COUNT(t2.a) > 0]), rowset=16 + 3 - output(nil), filter([T_FUN_COUNT(t2.a) > 0], [T_FUN_SUM(t2.a) > cast(1, DECIMAL_INT(33, 0))]), rowset=16 group([t2.a]), agg_func([T_FUN_SUM(t2.a)], [T_FUN_COUNT(t2.a)]) 4 - output([t2.a]), filter(nil), rowset=16 access([t2.a]), partitions(p0)