/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX SQL_OPT #include "sql/optimizer/ob_opt_est_cost_model.h" #include "sql/session/ob_sql_session_info.h" #include "sql/ob_sql_utils.h" #include "sql/optimizer/ob_optimizer_context.h" #include "sql/optimizer/ob_join_order.h" #include "sql/optimizer/ob_optimizer.h" #include "sql/optimizer/ob_opt_selectivity.h" #include "ob_opt_cost_model_parameter.h" #include using namespace oceanbase::common; using namespace oceanbase::share; using namespace oceanbase; using namespace sql; using namespace oceanbase::jit::expr; const int64_t ObOptEstCostModel::DEFAULT_LOCAL_ORDER_DEGREE = 32; const int64_t ObOptEstCostModel::DEFAULT_MAX_STRING_WIDTH = 64; const int64_t ObOptEstCostModel::DEFAULT_FIXED_OBJ_WIDTH = 12; int ObCostColumnGroupInfo::assign(const ObCostColumnGroupInfo& info) { int ret = OB_SUCCESS; if (OB_FAIL(filters_.assign(info.filters_))) { LOG_WARN("failed to assign filters", K(ret)); } else if (OB_FAIL(access_column_items_.assign(info.access_column_items_))) { LOG_WARN("failed to assign column", K(ret)); } else { column_id_ = info.column_id_; micro_block_count_ = info.micro_block_count_; filter_sel_ = info.filter_sel_; skip_rate_ = info.skip_rate_; skip_filter_sel_ = info.skip_filter_sel_; } return ret; } int ObCostTableScanInfo::assign(const ObCostTableScanInfo &est_cost_info) { int ret = OB_SUCCESS; if (OB_FAIL(ranges_.assign(est_cost_info.ranges_))) { LOG_WARN("failed to assign range", K(ret)); } else if (OB_FAIL(ss_ranges_.assign(est_cost_info.ss_ranges_))) { LOG_WARN("failed to assign range", K(ret)); } else if (OB_FAIL(range_columns_.assign(est_cost_info.range_columns_))) { LOG_WARN("failed to assign range columns", K(ret)); } else if (OB_FAIL(access_column_items_.assign(est_cost_info.access_column_items_))) { LOG_WARN("failed to assign access columns", K(ret)); } else if (OB_FAIL(index_access_column_items_.assign(est_cost_info.index_access_column_items_))) { LOG_WARN("failed to assign access columns", K(ret)); } else if (OB_FAIL(prefix_filters_.assign(est_cost_info.prefix_filters_))) { LOG_WARN("failed to assign access columns", K(ret)); } else if (OB_FAIL(pushdown_prefix_filters_.assign(est_cost_info.pushdown_prefix_filters_))) { LOG_WARN("failed to assign access columns", K(ret)); } else if (OB_FAIL(ss_postfix_range_filters_.assign(est_cost_info.ss_postfix_range_filters_))) { LOG_WARN("failed to assign access columns", K(ret)); } else if (OB_FAIL(postfix_filters_.assign(est_cost_info.postfix_filters_))) { LOG_WARN("failed to assign access columns", K(ret)); } else if (OB_FAIL(table_filters_.assign(est_cost_info.table_filters_))) { LOG_WARN("failed to assign access columns", K(ret)); } else if (OB_FAIL(access_columns_.assign(est_cost_info.access_columns_))) { LOG_WARN("failed to assign access columns", K(ret)); } else if (OB_FAIL(column_group_infos_.assign(est_cost_info.column_group_infos_))) { LOG_WARN("failed to to assign column group infos", K(ret)); } else { table_id_ = est_cost_info.table_id_; ref_table_id_ = est_cost_info.ref_table_id_; index_id_ = est_cost_info.index_id_; table_meta_info_ = est_cost_info.table_meta_info_; index_meta_info_.assign(est_cost_info.index_meta_info_); is_virtual_table_ = est_cost_info.is_virtual_table_; is_unique_ = est_cost_info.is_unique_; is_inner_path_ = est_cost_info.is_inner_path_; can_use_batch_nlj_ = est_cost_info.can_use_batch_nlj_; table_metas_ = est_cost_info.table_metas_; sel_ctx_ = est_cost_info.sel_ctx_; row_est_method_ = est_cost_info.row_est_method_; prefix_filter_sel_ = est_cost_info.prefix_filter_sel_; pushdown_prefix_filter_sel_ = est_cost_info.pushdown_prefix_filter_sel_; postfix_filter_sel_ = est_cost_info.postfix_filter_sel_; table_filter_sel_ = est_cost_info.table_filter_sel_; join_filter_sel_ = est_cost_info.join_filter_sel_; ss_prefix_ndv_ = est_cost_info.ss_prefix_ndv_; ss_postfix_range_filters_sel_ = est_cost_info.ss_postfix_range_filters_sel_; logical_query_range_row_count_ = est_cost_info.logical_query_range_row_count_; phy_query_range_row_count_ = est_cost_info.phy_query_range_row_count_; index_back_row_count_ = est_cost_info.index_back_row_count_; output_row_count_ = est_cost_info.output_row_count_; batch_type_ = est_cost_info.batch_type_; sample_info_ = est_cost_info.sample_info_; use_column_store_ = est_cost_info.use_column_store_; // no need to copy table scan param } return ret; } void ObTableMetaInfo::assign(const ObTableMetaInfo &table_meta_info) { ref_table_id_ = table_meta_info.ref_table_id_; schema_version_ = table_meta_info.schema_version_; part_count_ = table_meta_info.part_count_; micro_block_size_ = table_meta_info.micro_block_size_; part_size_ = table_meta_info.part_size_; average_row_size_ = table_meta_info.average_row_size_; table_column_count_ = table_meta_info.table_column_count_; table_rowkey_count_ = table_meta_info.table_rowkey_count_; table_row_count_ = table_meta_info.table_row_count_; row_count_ = table_meta_info.row_count_; has_opt_stat_ = table_meta_info.has_opt_stat_; micro_block_count_ = table_meta_info.micro_block_count_; table_type_ = table_meta_info.table_type_; } double ObTableMetaInfo::get_micro_block_numbers() const { double ret = 0.0; if (micro_block_count_ <= 0) { // calculate micro block count use storage statistics ret = 0; } else { // get micro block count from optimizer statistics ret = static_cast(micro_block_count_); } return ret; } void ObIndexMetaInfo::assign(const ObIndexMetaInfo &index_meta_info) { ref_table_id_ = index_meta_info.ref_table_id_; index_id_ = index_meta_info.index_id_; index_micro_block_size_ = index_meta_info.index_micro_block_size_; index_part_count_ = index_meta_info.index_part_count_; index_part_size_ = index_meta_info.index_part_size_; index_part_count_ = index_meta_info.index_part_count_; index_column_count_ = index_meta_info.index_column_count_; is_index_back_ = index_meta_info.is_index_back_; is_unique_index_ = index_meta_info.is_unique_index_; is_global_index_ = index_meta_info.is_global_index_; index_micro_block_count_ = index_meta_info.index_micro_block_count_; } double ObIndexMetaInfo::get_micro_block_numbers() const { double ret = 0.0; if (index_micro_block_count_ <= 0) { // calculate micore block count use storage statistics ret = 0; } else { // get micro block count from optimizer statistics ret = static_cast(index_micro_block_count_); } return ret; } /** * @brief 估算Nested Loop Join的代价 * @formula cost(总代价) = get_next_row_cost * + left_cost + right_cost * + left_rows * rescan_cost * + JOIN_PER_ROW_COST * output_rows * + qual_cost */ int ObOptEstCostModel::cost_nestloop(const ObCostNLJoinInfo &est_cost_info, double &cost, ObIArray &all_predicate_sel) { int ret = OB_SUCCESS; cost = 0.0; if (OB_ISNULL(est_cost_info.table_metas_) || OB_ISNULL(est_cost_info.sel_ctx_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("null point", K(est_cost_info.table_metas_), K(est_cost_info.sel_ctx_)); } else { double left_rows = est_cost_info.left_rows_; double right_rows = est_cost_info.right_rows_; double cart_tuples = left_rows * right_rows; // tuples of Cartesian product double out_tuples = 0.0; double filter_selectivity = 0.0; double material_cost = 0.0; //selectivity for equal conds if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, *est_cost_info.sel_ctx_, est_cost_info.other_join_conditions_, filter_selectivity, all_predicate_sel))) { LOG_WARN("Failed to calculate filter selectivity", K(ret)); } else { out_tuples = cart_tuples * filter_selectivity; // 再次扫描右表全表的代价。如果不使用物化,就是读取一次右表和本层get_next_row的代价; // 如果物化,则为读取物化后的行的代价。 double once_rescan_cost = 0.0; if (est_cost_info.need_mat_) { once_rescan_cost = cost_read_materialized(right_rows); } else { double rescan_cost = 0.0; if (est_cost_info.right_has_px_rescan_) { if (est_cost_info.parallel_ > 1) { rescan_cost = cost_params_.get_px_rescan_per_row_cost(sys_stat_); } else { rescan_cost = cost_params_.get_px_batch_rescan_per_row_cost(sys_stat_); } } else { rescan_cost = cost_params_.get_rescan_cost(sys_stat_); } once_rescan_cost = est_cost_info.right_cost_ + rescan_cost + right_rows * cost_params_.get_cpu_tuple_cost(sys_stat_); } // total rescan cost if (LEFT_SEMI_JOIN == est_cost_info.join_type_ || LEFT_ANTI_JOIN == est_cost_info.join_type_) { double match_sel = (est_cost_info.anti_or_semi_match_sel_ < OB_DOUBLE_EPSINON) ? OB_DOUBLE_EPSINON : est_cost_info.anti_or_semi_match_sel_; out_tuples = left_rows * match_sel; } cost += left_rows * once_rescan_cost; //qual cost double qual_cost = cost_quals(left_rows * right_rows, est_cost_info.equal_join_conditions_) + cost_quals(left_rows * right_rows, est_cost_info.other_join_conditions_); cost += qual_cost; double join_cost = cost_params_.get_join_per_row_cost(sys_stat_) * out_tuples; cost += join_cost; LOG_TRACE("OPT: [COST NESTLOOP JOIN]", K(cost), K(qual_cost), K(join_cost),K(once_rescan_cost), K(est_cost_info.left_cost_), K(est_cost_info.right_cost_), K(left_rows), K(right_rows), K(est_cost_info.right_width_), K(filter_selectivity), K(cart_tuples), K(material_cost)); } } return ret; } /** * @brief 估算Merge Join的代价 * @formula cost(总代价) = left_cost + right_cost * + get_next_row_cost * + qual_cost * + COST_JOIN_PER_ROW * output_rows * * @param[in] est_cost_info 用于计算merge join代价的一些参数 * @param[out] merge_cost merge join算子的总代价 */ int ObOptEstCostModel::cost_mergejoin(const ObCostMergeJoinInfo &est_cost_info, double &cost) { int ret = OB_SUCCESS; double left_selectivity = 0.0; double right_selectivity = 0.0; cost = 0.0; double left_rows = est_cost_info.left_rows_; double right_rows = est_cost_info.right_rows_; double left_width = est_cost_info.left_width_; double cond_tuples = 0.0; double out_tuples = 0.0; double cond_sel = est_cost_info.equal_cond_sel_; double filter_sel = est_cost_info.other_cond_sel_; if (IS_SEMI_ANTI_JOIN(est_cost_info.join_type_)) { if (LEFT_SEMI_JOIN == est_cost_info.join_type_) { cond_tuples = left_rows * cond_sel; } else if (LEFT_ANTI_JOIN == est_cost_info.join_type_) { cond_tuples = left_rows * (1 - cond_sel); } else if (RIGHT_SEMI_JOIN == est_cost_info.join_type_) { cond_tuples = right_rows * cond_sel; } else if (RIGHT_ANTI_JOIN == est_cost_info.join_type_) { cond_tuples = right_rows * (1 - cond_sel); } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected join type", K(est_cost_info.join_type_), K(ret)); } } else { cond_tuples = left_rows * right_rows * cond_sel; } out_tuples = cond_tuples * filter_sel; // get_next_row()获取左表和右表所有行的代价 cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * (left_rows + right_rows); // 谓词代价 cost += cost_quals(cond_tuples, est_cost_info.equal_join_conditions_) + cost_quals(cond_tuples, est_cost_info.other_join_conditions_); // JOIN连接的代价 cost += cost_params_.get_join_per_row_cost(sys_stat_) * out_tuples; cost += cost_material(left_rows, left_width); cost += cost_read_materialized(left_rows); LOG_TRACE("OPT: [COST MERGE JOIN]", K(left_rows), K(right_rows), K(cond_sel), K(filter_sel), K(cond_tuples), K(out_tuples), K(cost)); return ret; } /** * @brief 估算Hash Join的代价 * @formula cost(总代价) = left_cost + right_cost * + left_rows * BUILD_HASH_PER_ROW_COST * + material_cost * + right_rows * PROBE_HASH_PER_ROW_COST * + (left_rows + right_rows) * HASH_COST * + qual_cost * + JOIN_PER_ROW_COST * output_rows * @param[in] est_cost_info 用于计算hash join代价的一些参数 * @param[out] hash_cost hash join算子的总代价 * @param[in] all_predicate_sel 各个谓词的选择率 */ int ObOptEstCostModel::cost_hashjoin(const ObCostHashJoinInfo &est_cost_info, double &cost) { int ret = OB_SUCCESS; cost = 0.0; double build_hash_cost = 0.0; double left_rows = est_cost_info.left_rows_; double right_rows = est_cost_info.right_rows_; double cond_sel = est_cost_info.equal_cond_sel_; double filter_sel = est_cost_info.other_cond_sel_; // number of tuples satisfying join-condition double cond_tuples = 0.0; // number of tuples satisfying filters, which is also the number of output tuples double out_tuples = 0.0; if (IS_SEMI_ANTI_JOIN(est_cost_info.join_type_)) { if (LEFT_SEMI_JOIN == est_cost_info.join_type_) { cond_tuples = left_rows * cond_sel; } else if (LEFT_ANTI_JOIN == est_cost_info.join_type_) { cond_tuples = left_rows * (1 - cond_sel); } else if (RIGHT_SEMI_JOIN == est_cost_info.join_type_) { cond_tuples = right_rows * cond_sel; } else if (RIGHT_ANTI_JOIN == est_cost_info.join_type_) { cond_tuples = right_rows * (1 - cond_sel); } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected join type", K(est_cost_info.join_type_), K(ret)); } } else { cond_tuples = left_rows * right_rows * cond_sel; } out_tuples = cond_tuples * filter_sel; double join_filter_cost = 0.0; for (int i = 0; i < est_cost_info.join_filter_infos_.count(); ++i) { const JoinFilterInfo& info = est_cost_info.join_filter_infos_.at(i); //bloom filter构建、使用代价 join_filter_cost += cost_hash(left_rows, info.lexprs_) + cost_hash(right_rows, info.rexprs_); if (info.need_partition_join_filter_) { //partition join filter代价 join_filter_cost += cost_hash(left_rows, info.lexprs_); } right_rows *= info.join_filter_selectivity_; } cost += join_filter_cost; // build hash cost for left table build_hash_cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * left_rows; build_hash_cost += cost_material(left_rows, est_cost_info.left_width_); build_hash_cost += cost_hash(left_rows, est_cost_info.equal_join_conditions_); build_hash_cost += cost_params_.get_build_hash_per_row_cost(sys_stat_) * left_rows; // probe cost for right table cost += build_hash_cost; cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * right_rows; cost += cost_hash(right_rows, est_cost_info.equal_join_conditions_); cost += cost_params_.get_probe_hash_per_row_cost(sys_stat_) * right_rows; cost += cost_quals(cond_tuples, est_cost_info.equal_join_conditions_) + cost_quals(cond_tuples, est_cost_info.other_join_conditions_); cost += cost_params_.get_join_per_row_cost(sys_stat_) * out_tuples; LOG_TRACE("OPT: [COST HASH JOIN]", K(left_rows), K(right_rows), K(cond_sel), K(filter_sel), K(cond_tuples), K(out_tuples), K(join_filter_cost), K(cost), K(build_hash_cost)); return ret; } int ObOptEstCostModel::cost_sort_and_exchange(OptTableMetas *table_metas, OptSelectivityCtx *sel_ctx, const ObPQDistributeMethod::Type dist_method, const bool is_distributed, const bool is_local_order, const double input_card, const double input_width, const double input_cost, const int64_t out_parallel, const int64_t in_server_cnt, const int64_t in_parallel, const ObIArray &expected_ordering, const bool need_sort, const int64_t prefix_pos, double &cost) { int ret = OB_SUCCESS; double exch_cost = 0.0; double sort_cost = 0.0; bool need_exchange = (dist_method != ObPQDistributeMethod::NONE); bool exchange_need_merge_sort = need_exchange && (is_distributed || is_local_order) && (!need_sort || ObPQDistributeMethod::LOCAL == dist_method); bool exchange_sort_local_order = need_exchange && !need_sort && is_local_order; bool need_exchange_down_sort = (ObPQDistributeMethod::LOCAL == dist_method || ObPQDistributeMethod::NONE == dist_method) && (need_sort || is_local_order); bool need_exchange_up_sort = need_sort && need_exchange && ObPQDistributeMethod::LOCAL != dist_method; cost = 0.0; if (need_exchange) { ObSEArray exchange_sort_keys; if (exchange_need_merge_sort && OB_FAIL(exchange_sort_keys.assign(expected_ordering))) { LOG_WARN("failed to assign sort keys", K(ret)); } else { ObExchCostInfo exch_info(input_card, input_width, dist_method, out_parallel, in_parallel, exchange_sort_local_order, exchange_sort_keys, in_server_cnt); if (OB_FAIL(ObOptEstCostModel::cost_exchange(exch_info, exch_cost))) { LOG_WARN("failed to cost exchange", K(ret)); } else { /*do nothing*/ } } } if (OB_SUCC(ret) && (need_exchange_down_sort || need_exchange_up_sort)) { double card = input_card; double width = input_width; bool real_local_order = false; int64_t real_prefix_pos = 0; if (need_exchange_down_sort) { card /= out_parallel; real_prefix_pos = need_sort && !is_local_order ? prefix_pos : 0; real_local_order = need_sort ? false : is_local_order; } else { real_prefix_pos = need_exchange ? 0 : prefix_pos; if (ObPQDistributeMethod::BROADCAST != dist_method) { card /= in_parallel; } } ObSortCostInfo cost_info(card, width, real_prefix_pos, expected_ordering, real_local_order, table_metas, sel_ctx); if (OB_FAIL(ObOptEstCostModel::cost_sort(cost_info, sort_cost))) { LOG_WARN("failed to calc cost", K(ret)); } else { /*do nothing*/ } } if (OB_SUCC(ret)) { cost = input_cost + exch_cost + sort_cost; LOG_TRACE("succeed to compute distributed sort cost", K(input_cost), K(exch_cost), K(sort_cost), K(need_sort), K(prefix_pos), K(is_local_order)); } return ret; } int ObOptEstCostModel::cost_sort(const ObSortCostInfo &cost_info, double &cost) { int ret = OB_SUCCESS; ObSEArray order_exprs; ObSEArray order_types; // top-n排序不会进行前缀排序 // 如果获取不到est_sel_info,也回退到普通的排序代价估算 cost = 0.0; if (OB_FAIL(ObOptimizerUtil::get_expr_and_types(cost_info.order_items_, order_exprs, order_types))) { LOG_WARN("failed to get expr types", K(ret)); } else if (order_exprs.empty()) { /*do nothing*/ } else if (cost_info.is_local_merge_sort_) { if (OB_FAIL(cost_local_order_sort(cost_info, order_types, cost))) { LOG_WARN("failed to cost local order sort", K(ret)); } else { // get_next_row获取下层算子行的代价 cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * cost_info.rows_; } } else if (cost_info.prefix_pos_ > 0) { // prefix sort if (OB_FAIL(cost_prefix_sort(cost_info, order_exprs, cost_info.topn_, cost))) { LOG_WARN("failed to calc prefix cost", K(ret)); } else { // get_next_row获取下层算子行的代价 cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * cost_info.rows_; } } else if (cost_info.part_cnt_ > 0 && cost_info.topn_ >= 0) { //part topn sort/part topn limit if (OB_FAIL(cost_part_topn_sort(cost_info, order_exprs, order_types, cost))) { LOG_WARN("failed to calc part cost", K(ret)); } else { // get_next_row获取下层算子行的代价 cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * cost_info.rows_; } } else if (cost_info.topn_ >= 0) { //top-n sort if (OB_FAIL(cost_topn_sort(cost_info, order_types, cost))) { LOG_WARN("failed to calc topn sort cost", K(ret)); } else { // get_next_row获取下层算子行的代价 cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * cost_info.rows_; } } else if (cost_info.part_cnt_ > 0) { // part sort if (OB_FAIL(cost_part_sort(cost_info, order_exprs, order_types, cost))) { LOG_WARN("failed to calc part cost", K(ret)); } else { // get_next_row获取下层算子行的代价 cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * cost_info.rows_; } } else { // normal sort if (OB_FAIL(cost_sort(cost_info, order_types, cost))) { LOG_WARN("failed to calc cost", K(ret)); } else { // get_next_row获取下层算子行的代价 cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * cost_info.rows_; } } LOG_TRACE("succeed to compute sort cost", K(cost_info), K(cost)); return ret; } /** * @brief 估算Sort算子代价的函数。 * @formula cost = material_cost + sort_cost * material_cost = cost_material(...) + cost_read_materialized(...) * sort_cost = cost_cmp_per_row * N * logN * @param[in] cost_info 估算排序代价的一些参数 * row 待排序的行数 * width 平均行长 * @param[in] order_cols 排序列 * @param[out] cost 排序算子自身的代价 */ int ObOptEstCostModel::cost_sort(const ObSortCostInfo &cost_info, const ObIArray &order_col_types, double &cost) { int ret = OB_SUCCESS; cost = 0.0; double real_sort_cost = 0.0; double material_cost = 0.0; double rows = cost_info.rows_; double width = cost_info.width_; if (rows < 1.0) { material_cost = 0; } else { material_cost = cost_material(rows, width) + cost_read_materialized(rows * LOG2(rows)); } if (OB_FAIL(cost_sort_inner(order_col_types, rows, real_sort_cost))) { LOG_WARN("failed to calc cost", K(ret)); } else { cost = material_cost + real_sort_cost; LOG_TRACE("OPT: [COST SORT]", K(cost), K(material_cost), K(real_sort_cost), K(rows), K(width), K(order_col_types), "is_prefix_sort", cost_info.prefix_pos_ > 0); } return ret; } /** * 理想假设:行数为2的指数倍(保证桶数量和行数量相等),输入数据具有任意性,桶内所有数据哈希值不同。 * @brief 估算 PART_SORT Sort算子代价的函数,与窗口函数连用。 * @formula cost = material_cost + hash_cost + sort_cost * material_cost = cost_material(...) + cost_read_materialized(...) * hash_cost = calc_hash * part_expr * rows + build_hash * rows * sort_cost = cost_cmp_per_row * rows * theoretical_cmp_times * @param[in] cost_info 估算排序代价的一些参数 * rows 待排序的行数 * width 平均行长 * @param[in] order_exprs 需要排序的表达式,前半部分是 part by,后半部分是 order by * @param[in] order_col_types 需要排序的列的类型,order by 部分用作桶内排序 * @param[in] part_cnt 表达式组中 part by 部分所占个数,用作桶间排序 * @param[out] cost 排序算子自身的代价 */ int ObOptEstCostModel::cost_part_sort(const ObSortCostInfo &cost_info, const ObIArray &order_exprs, const ObIArray &order_col_types, double &cost) { int ret = OB_SUCCESS; cost = 0.0; double real_sort_cost = 0.0; double material_cost = 0.0; double calc_hash_cost = 0.0; double rows = cost_info.rows_; double width = cost_info.width_; double distinct_parts = rows; ObSEArray part_exprs; ObSEArray sort_types; for (int64_t i = 0; OB_SUCC(ret) && i < order_exprs.count(); ++i) { if (i < cost_info.part_cnt_) { if (OB_FAIL(part_exprs.push_back(order_exprs.at(i)))) { LOG_WARN("fail to push back expr", K(ret)); } } else { if (OB_FAIL(sort_types.push_back(order_col_types.at(i)))) { LOG_WARN("fail to push back type", K(ret)); } } } if (OB_SUCC(ret)) { if (OB_FAIL(ObOptSelectivity::calculate_distinct(*cost_info.table_metas_, *cost_info.sel_ctx_, part_exprs, rows, distinct_parts))) { LOG_WARN("failed to calculate distinct", K(ret)); } else if (OB_UNLIKELY(distinct_parts < 1.0 || distinct_parts > rows)) { distinct_parts = rows; } } if (OB_SUCC(ret)) { if (OB_UNLIKELY(rows < 0.0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid row count", K(rows), K(ret)); } else if (rows < 1.0) { // do nothing } else { double comp_cost = 0.0; if (sort_types.count() > 0 && OB_FAIL(get_sort_cmp_cost(sort_types, comp_cost))) { LOG_WARN("failed to get cmp cost", K(ret)); } else if (OB_UNLIKELY(comp_cost < 0.0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("negative cost", K(comp_cost), K(ret)); } else { real_sort_cost = rows * LOG2(rows / distinct_parts) * comp_cost; material_cost = cost_material(rows, width) + cost_read_materialized(rows); calc_hash_cost = cost_hash(rows, part_exprs) + rows * cost_params_.get_build_hash_per_row_cost(sys_stat_) / 2.0; cost = real_sort_cost + material_cost + calc_hash_cost; LOG_TRACE("OPT: [COST HASH SORT]", K(cost), K(real_sort_cost), K(calc_hash_cost), K(material_cost), K(rows), K(width), K(cost_info.part_cnt_)); } } } return ret; } int ObOptEstCostModel::cost_part_topn_sort(const ObSortCostInfo &cost_info, const ObIArray &order_exprs, const ObIArray &order_col_types, double &cost) { int ret = OB_SUCCESS; cost = 0.0; double real_sort_cost = 0.0; double material_cost = 0.0; double calc_hash_cost = 0.0; double rows = cost_info.rows_; double width = cost_info.width_; double distinct_parts = rows; ObSEArray part_exprs; ObSEArray sort_types; for (int64_t i = 0; OB_SUCC(ret) && i < order_exprs.count(); ++i) { if (i < cost_info.part_cnt_) { if (OB_FAIL(part_exprs.push_back(order_exprs.at(i)))) { LOG_WARN("fail to push back expr", K(ret)); } } else { if (OB_FAIL(sort_types.push_back(order_col_types.at(i)))) { LOG_WARN("fail to push back type", K(ret)); } } } if (OB_SUCC(ret)) { if (OB_FAIL(ObOptSelectivity::calculate_distinct(*cost_info.table_metas_, *cost_info.sel_ctx_, part_exprs, rows, distinct_parts))) { LOG_WARN("failed to calculate distinct", K(ret)); } else if (OB_UNLIKELY(distinct_parts < 1.0 || distinct_parts > rows)) { distinct_parts = rows; } } if (OB_SUCC(ret)) { //partition topn sort double topn = cost_info.topn_; double one_part_rows = rows; if (distinct_parts != 0) { one_part_rows = rows / distinct_parts; } if (topn > one_part_rows) { topn = one_part_rows; } material_cost = cost_material(topn, width) * distinct_parts; if (sort_types.count() > 0 && OB_FAIL(cost_topn_sort_inner(sort_types, one_part_rows, topn, real_sort_cost))) { LOG_WARN("failed to calc cost", K(ret)); } else { real_sort_cost = real_sort_cost * distinct_parts; calc_hash_cost = cost_hash(rows, part_exprs) + rows * cost_params_.get_build_hash_per_row_cost(sys_stat_) / 2.0; cost = material_cost + real_sort_cost + calc_hash_cost; LOG_TRACE("OPT: [COST PARTITION TOPN SORT]", K(cost), K(calc_hash_cost), K(material_cost), K(real_sort_cost), K(rows), K(width), K(topn), K(cost_info.part_cnt_)); } } return ret; } int ObOptEstCostModel::cost_prefix_sort(const ObSortCostInfo &cost_info, const ObIArray &order_exprs, const int64_t topn_count, double &cost) { int ret = OB_SUCCESS; double rows = cost_info.rows_; double width = cost_info.width_; double cost_per_group = 0.0; if (OB_ISNULL(cost_info.table_metas_) || OB_ISNULL(cost_info.sel_ctx_) || OB_UNLIKELY(cost_info.prefix_pos_ <= 0 || cost_info.prefix_pos_ >= order_exprs.count())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("get unexpected error", K(cost_info.table_metas_), K(cost_info.sel_ctx_), K(cost_info.prefix_pos_), K(order_exprs.count()), K(ret)); } else { ObSEArray prefix_ordering; ObSEArray ordering_per_group; for (int64_t i = 0; OB_SUCC(ret) && i < cost_info.prefix_pos_; i++) { if (OB_ISNULL(order_exprs.at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); } else if (OB_FAIL(prefix_ordering.push_back(order_exprs.at(i)))) { LOG_WARN("failed to push back expr", K(ret)); } else { /*do nothing*/ } } for (int64_t i = cost_info.prefix_pos_; OB_SUCC(ret) && i < order_exprs.count(); ++i) { if (OB_ISNULL(order_exprs.at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); } else if (OB_FAIL(ordering_per_group.push_back(OrderItem(order_exprs.at(i))))) { LOG_WARN("failed to push array", K(ret)); } else { /*do nothing*/ } } if (OB_SUCC(ret)) { // 前缀排序的每个部分不会进行前缀排序,也不会进行topn排序 int64_t prefix_pos = 0; double num_rows_per_group = 0; double num_distinct_rows = rows; if (OB_FAIL(ObOptSelectivity::calculate_distinct(*cost_info.table_metas_, *cost_info.sel_ctx_, prefix_ordering, rows, num_distinct_rows))) { LOG_WARN("failed to calculate distinct", K(ret)); } else if (OB_UNLIKELY(std::fabs(num_distinct_rows) < OB_DOUBLE_EPSINON)) { num_rows_per_group = rows; } else { num_rows_per_group = rows / num_distinct_rows; } if (topn_count >= 0 && num_rows_per_group > 0) { // topn prefix sort double remaining_count = topn_count; while (remaining_count > 0 && num_rows_per_group > 0) { ObSortCostInfo cost_info_per_group(num_rows_per_group, width, prefix_pos, ordering_per_group, false); cost_info_per_group.topn_ = remaining_count; if (OB_FAIL(cost_sort(cost_info_per_group, cost_per_group))) { LOG_WARN("failed to cost sort", K(ret)); } else { cost += cost_per_group; remaining_count -= num_rows_per_group; } } } else { // normal prefix sort ObSortCostInfo cost_info_per_group(num_rows_per_group, width, prefix_pos, ordering_per_group, false, cost_info.table_metas_, cost_info.sel_ctx_); if (OB_FAIL(cost_sort(cost_info_per_group, cost_per_group))) { LOG_WARN("failed to calc cost", K(ret)); } else { cost = cost_per_group * num_distinct_rows; LOG_TRACE("OPT: [COST PREFIX SORT]", K(cost), K(cost_per_group), K(num_distinct_rows)); } } } } return ret; } /** * @brief 计算排序算子实际排序部分的代价 * * cost = cost_cmp * rows * log(row_count) */ int ObOptEstCostModel::cost_sort_inner(const ObIArray &types, double row_count, double &cost) { int ret = OB_SUCCESS; cost = 0.0; if (OB_UNLIKELY(0.0 > row_count)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid row count", K(row_count), K(ret)); } else if (row_count < 1.0) { // LOG2(x) 在x小于1时为负数,这里需要特殊处理 cost = 0.0; } else { double cost_cmp = 0.0; if (OB_FAIL(get_sort_cmp_cost(types, cost_cmp))) { LOG_WARN("failed to get cmp cost", K(ret)); } else if (OB_UNLIKELY(0.0 > cost_cmp)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("negative cost", K(cost_cmp), K(ret)); } else { cost = cost_cmp * row_count * LOG2(row_count); } } return ret; } int ObOptEstCostModel::cost_local_order_sort_inner(const common::ObIArray &types, double row_count, double &cost) { int ret = OB_SUCCESS; cost = 0.0; if (OB_UNLIKELY(0.0 > row_count)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid row count", K(row_count), K(ret)); } else if (row_count < 1.0) { // LOG2(x) 在x小于1时为负数,这里需要特殊处理 cost = 0.0; } else { double cost_cmp = 0.0; if (OB_FAIL(get_sort_cmp_cost(types, cost_cmp))) { LOG_WARN("failed to get cmp cost", K(ret)); } else if (OB_UNLIKELY(0.0 > cost_cmp)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("negative cost", K(cost_cmp), K(ret)); } else { cost = cost_cmp * row_count * LOG2(ObOptEstCostModel::DEFAULT_LOCAL_ORDER_DEGREE); } } return ret; } /** * @brief 估算TOP-N Sort算子代价的函数。 * @formula cost = material_cost + sort_cost * material_cost = cost_material(...) * sort_cost = cost_cmp_per_row * rows * logN * @param[in] cost_info 估算排序代价的一些参数 * rows 待排序的行数 * topn TOP-N * width 平均行长 * @param[in] store_cols 需要进行物化的所有列 * @param[in] order_cols 排序列 * @param[out] cost 排序算子自身的代价 */ int ObOptEstCostModel::cost_topn_sort(const ObSortCostInfo &cost_info, const ObIArray &types, double &cost) { int ret = OB_SUCCESS; cost = 0.0; double rows = cost_info.rows_; double width = cost_info.width_; double topn = cost_info.topn_; double real_sort_cost = 0.0; double material_cost = 0.0; if (0 == types.count() || topn < 0) { // do nothing } else { if (topn > rows) { topn = rows; } // top-n sort至少物化n行,至多物化rows行 // 我们认为topn sort大约需要物化两者的平均数(n + rows) / 2 material_cost = cost_material(topn, width); if (OB_FAIL(cost_topn_sort_inner(types, rows, topn, real_sort_cost))) { LOG_WARN("failed to calc cost", K(ret)); } else { cost = material_cost + real_sort_cost; LOG_TRACE("OPT: [COST TOPN SORT]", K(cost), K(material_cost), K(real_sort_cost), K(rows), K(width), K(topn)); } } return ret; } int ObOptEstCostModel::cost_local_order_sort(const ObSortCostInfo &cost_info, const ObIArray &types, double &cost) { int ret = OB_SUCCESS; cost = 0.0; double real_sort_cost = 0.0; double material_cost = 0.0; double rows = cost_info.rows_; double width = cost_info.width_; material_cost = cost_material(rows, width) + cost_read_materialized(rows); if (OB_FAIL(cost_local_order_sort_inner(types, rows, real_sort_cost))) { LOG_WARN("failed to calc cost", K(ret)); } else { cost = material_cost + real_sort_cost; LOG_TRACE("OPT: [COST LOCAL ORDER SORT]", K(cost), K(material_cost), K(real_sort_cost), K(rows), K(width), K(types)); } return ret; } /** * @brief 计算topn排序算子实际排序部分的代价 * * cost = cost_cmp * rows * log(n) */ int ObOptEstCostModel::cost_topn_sort_inner(const ObIArray &types, double rows, double n, double &cost) { int ret = OB_SUCCESS; cost = 0.0; if (OB_UNLIKELY(0.0 > rows)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid number of rows", K(rows), K(ret)); } else if (n < 1.0) { // LOG2(x) 在x小于1时为负数,这里需要特殊处理 cost = 0.0; } else { double cost_cmp = 0.0; if (OB_FAIL(get_sort_cmp_cost(types, cost_cmp))) { LOG_WARN("failed to get cmp cost", K(ret)); } else if (OB_UNLIKELY(0.0 > cost_cmp)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("negative cost", K(cost_cmp), K(ret)); } else { cost = cost_cmp * rows * LOG2(n); } } return ret; } int ObOptEstCostModel::cost_exchange(const ObExchCostInfo &cost_info, double &ex_cost) { int ret = OB_SUCCESS; double ex_out_cost = 0.0; double ex_in_cost = 0.0; ObExchOutCostInfo out_est_cost_info(cost_info.rows_, cost_info.width_, cost_info.dist_method_, cost_info.out_parallel_, cost_info.in_server_cnt_); ObExchInCostInfo in_est_cost_info(cost_info.rows_, cost_info.width_, cost_info.dist_method_, cost_info.in_parallel_, cost_info.in_server_cnt_, cost_info.is_local_order_, cost_info.sort_keys_); if (OB_FAIL(ObOptEstCostModel::cost_exchange_out(out_est_cost_info, ex_out_cost))) { LOG_WARN("failed to cost exchange in output", K(ret)); } else if (OB_FAIL(ObOptEstCostModel::cost_exchange_in(in_est_cost_info, ex_in_cost))) { LOG_WARN("failed to cost exchange in", K(ret)); } else { ex_cost = ex_out_cost + ex_in_cost; } return ret; } int ObOptEstCostModel::cost_exchange_in(const ObExchInCostInfo &cost_info, double &cost) { int ret = OB_SUCCESS; double per_dop_rows = 0.0; ObSEArray order_exprs; ObSEArray order_types; cost = 0; if (OB_UNLIKELY(cost_info.parallel_ < 1)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected parallel degree", K(ret)); } else if (OB_FAIL(ObOptimizerUtil::get_expr_and_types(cost_info.sort_keys_, order_exprs, order_types))) { LOG_WARN("failed to get order expr and order types", K(ret)); } else if (ObPQDistributeMethod::BC2HOST == cost_info.dist_method_) { per_dop_rows = cost_info.rows_ * cost_info.server_cnt_ / cost_info.parallel_; } else if (ObPQDistributeMethod::BROADCAST == cost_info.dist_method_) { per_dop_rows = cost_info.rows_; } else { per_dop_rows = cost_info.rows_ / cost_info.parallel_; } if (OB_SUCC(ret)) { cost = cost_params_.get_cpu_tuple_cost(sys_stat_) * per_dop_rows; cost += cost_params_.get_network_deser_per_byte_cost(sys_stat_) * per_dop_rows * cost_info.width_; LOG_TRACE("OPT: [COST EXCHANGE IN]", K(cost_info.rows_), K(cost_info.width_), K(cost_info.dist_method_), K(cost_info.parallel_), K(cost)); if (ObPQDistributeMethod::BROADCAST == cost_info.dist_method_) { //每个线程都需要拷贝一份当前机器收到的数据 cost += ObOptEstCostModel::cost_material(per_dop_rows, cost_info.width_); } if (!cost_info.sort_keys_.empty() && per_dop_rows > 0) { double merge_degree = 0; double cmp_cost = 0.0; if (cost_info.is_local_order_) { cost += ObOptEstCostModel::cost_material(per_dop_rows, cost_info.width_); merge_degree = ObOptEstCostModel::DEFAULT_LOCAL_ORDER_DEGREE * cost_info.parallel_; } else { merge_degree = cost_info.parallel_; } if (merge_degree > per_dop_rows) { merge_degree = per_dop_rows; } if (OB_FAIL(get_sort_cmp_cost(order_types, cmp_cost))) { LOG_WARN("failed to get sort cmp cost", K(ret)); } else { cost += per_dop_rows * LOG2(merge_degree) * cmp_cost; } } } return ret; } int ObOptEstCostModel::cost_exchange_out(const ObExchOutCostInfo &cost_info, double &cost) { int ret = OB_SUCCESS; double per_dop_ser_rows = 0.0; double per_dop_trans_rows = 0.0; cost = 0.0; if (OB_UNLIKELY(cost_info.parallel_ < 1)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected parallel degree", K(cost_info.parallel_), K(ret)); } else if (ObPQDistributeMethod::BC2HOST == cost_info.dist_method_ || ObPQDistributeMethod::BROADCAST == cost_info.dist_method_) { per_dop_ser_rows = cost_info.rows_ / cost_info.parallel_; per_dop_trans_rows = cost_info.rows_ * cost_info.server_cnt_ / cost_info.parallel_; } else { per_dop_ser_rows = cost_info.rows_ / cost_info.parallel_; per_dop_trans_rows = per_dop_ser_rows; } if (OB_SUCC(ret)) { // add repartition cost, hash-hash cost ? cost = cost_params_.get_cpu_tuple_cost(sys_stat_) * per_dop_ser_rows; cost += cost_params_.get_network_ser_per_byte_cost(sys_stat_) * per_dop_ser_rows * cost_info.width_; cost += cost_params_.get_network_trans_per_byte_cost(sys_stat_) * per_dop_trans_rows * cost_info.width_; LOG_TRACE("OPT: [COST EXCHANGE OUT]", K(cost_info.rows_), K(cost_info.width_), K(cost_info.dist_method_), K(cost_info.parallel_), K(cost)); } return ret; } /** * @brief 估算Merge Group By算子代价的函数。 * @note 我们假设group by比较时几乎都需要比较所有列 * * @formula cost = CPU_TUPLE_COST *rows * + qual_cost(CMP_DEFAULT * num_group_columns * rows) * + PER_AGGR_FUNC_COST * num_aggr_columns * rows * * @param[in] rows 待排序的行数 * @param[in] group_columns group by的列数 * @param[in] agg_col_count 聚合函数的个数 * @return 算子自身的代价 */ double ObOptEstCostModel::cost_merge_group(double rows, double res_rows, double row_width, const ObIArray &group_columns, int64_t agg_col_count) { double cost = 0.0; cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * rows; //material cost cost += cost_material(res_rows, row_width); cost += cost_quals(rows, group_columns); cost += cost_params_.get_per_aggr_func_cost(sys_stat_) * static_cast(agg_col_count) * rows; LOG_TRACE("OPT: [COST MERGE GROUP BY]", K(cost), K(agg_col_count), K(rows), K(res_rows)); return cost; } /** * @brief 估算Hash Group By算子代价的函数。 * @formula cost = CPU_TUPLE_COST * rows * + BUILD_HASH_COST * res_rows * + PROBE_HASH_COST * rows * + hash_calculation_cost * + PER_AGGR_FUNC_COST * num_aggr_columns * rows * @param[in] rows 输入行数 * @param[in] group_columns group by的列 * @param[in] res_rows 输出行数 * @param[in] agg_col_count 聚合函数的个数 * @return 算子自身的代价 */ double ObOptEstCostModel::cost_hash_group(double rows, double res_rows, double row_width, const ObIArray &group_columns, int64_t agg_col_count) { double cost = 0; cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * rows; cost += cost_material(res_rows, row_width); cost += cost_params_.get_build_hash_per_row_cost(sys_stat_) * res_rows; cost += cost_params_.get_probe_hash_per_row_cost(sys_stat_) * rows; cost += cost_hash(rows, group_columns); cost += cost_params_.get_per_aggr_func_cost(sys_stat_) * static_cast(agg_col_count) * rows; LOG_TRACE("OPT: [HASH GROUP BY]", K(cost), K(agg_col_count), K(rows), K(res_rows)); return cost; } /** * @brief 估算Scalar Group By算子代价的函数。 * @formula cost = PER_AGGR_FUNC_COST * num_aggr_columns * rows * @param[in] rows 待排序的行数 * @param[in] agg_col_count 聚合函数的个数 * @return 算子自身的代价 */ double ObOptEstCostModel::cost_scalar_group(double rows, int64_t agg_col_count) { double cost = 0.0; cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * rows; cost += cost_params_.get_per_aggr_func_cost(sys_stat_) * static_cast(agg_col_count) * rows; LOG_TRACE("OPT: [SCALAR GROUP BY]", K(cost), K(agg_col_count), K(rows)); return cost; } /** * @brief 估算Merge Distinct 算子代价的函数。 * @formula cost = get_next_row_cost * + cost_quals * @param[in] rows 输入行数 * @param[in] distinct_columns distinct的列 * @return 算子自身的代价 */ double ObOptEstCostModel::cost_merge_distinct(double rows, double res_rows, double width, const ObIArray &distinct_columns) { double cost = 0.0; cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * rows; cost += cost_quals(rows, distinct_columns); LOG_TRACE("OPT: [COST MERGE DISTINCT]", K(cost), K(rows), K(res_rows)); return cost; } /** * @brief 估计Hash Distinct算子代价的函数。 * @formula cost = get_next_row_cost * + HASH_BUILD_COST * res_rows * + HASH_PROBE_COST * rows * + hash_calculation_cost * @param[in] rows 输入行数 * @param[in] res_rows 输出行数,也即distinct数 * @param[in] distinct_columns distinct列 */ double ObOptEstCostModel::cost_hash_distinct(double rows, double res_rows, double width, const ObIArray &distinct_columns) { double cost = 0.0; // get_next_row()的代价 cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * rows; //material cost cost += cost_material(res_rows, width); // 构建hash table的代价 cost += cost_params_.get_build_hash_per_row_cost(sys_stat_) * res_rows; // probe的代价 cost += cost_params_.get_probe_hash_per_row_cost(sys_stat_) * rows; // 计算hash值代价 cost += cost_hash(rows, distinct_columns); LOG_TRACE("OPT: [COST HASH DISTINCT]", K(cost), K(rows), K(res_rows)); return cost; } /** * @brief 估算 Select 下的 Sequence 算子的代价函数 */ double ObOptEstCostModel::cost_sequence(double rows, double uniq_sequence_cnt) { return cost_params_.get_cpu_tuple_cost(sys_stat_) * rows + cost_params_.get_cpu_operator_cost(sys_stat_) * uniq_sequence_cnt; } /** * @brief 估算Limit算子代价的函数。 * @formula cost = rows * CPU_TUPLE_COST * @return 算子自身的代价 */ double ObOptEstCostModel::cost_get_rows(double rows) { return rows * cost_params_.get_cpu_tuple_cost(sys_stat_); } /** * @brief 估算读取物化后的数据代价的函数。 */ double ObOptEstCostModel::cost_read_materialized(double rows) { return rows * cost_params_.get_read_materialized_per_row_cost(sys_stat_); } /** * @brief 估算Material算子代价的函数。 * @formula cost = MATERIALZE_PER_BYTE_COST * average_row_size * rows * @param[in] rows 需要物化的行数 * @param[in] average_row_size 每行的平均长度(字节) * @return 算子自身的代价 */ double ObOptEstCostModel::cost_material(const double rows, const double average_row_size) { double cost = cost_params_.get_materialize_per_byte_write_cost(sys_stat_) * average_row_size * rows; LOG_TRACE("OPT: [COST MATERIAL]", K(cost), K(rows), K(average_row_size)); return cost; } double ObOptEstCostModel::cost_late_materialization_table_get(int64_t column_cnt) { double op_cost = 0.0; double io_cost = cost_params_.get_micro_block_seq_cost(sys_stat_); double cpu_cost = (cost_params_.get_cpu_tuple_cost(sys_stat_) + cost_params_.get_project_column_cost(sys_stat_, PROJECT_INT, true, false) * column_cnt); op_cost = io_cost + cpu_cost; return op_cost; } void ObOptEstCostModel::cost_late_materialization_table_join(double left_card, double left_cost, double right_card, double right_cost, double &op_cost, double &cost) { op_cost = 0.0; cost = 0.0; // 再次扫描右表全表的代价。如果不使用物化,就是读取一次右表和本层get_next_row的代价; // 如果物化,则为读取物化后的行的代价。 double once_rescan_cost = right_cost + right_card * cost_params_.get_cpu_tuple_cost(sys_stat_); op_cost += left_card * once_rescan_cost + left_card * cost_params_.get_join_per_row_cost(sys_stat_); // 读取左表和本层get_next_row的代价 cost += left_cost + cost_params_.get_cpu_tuple_cost(sys_stat_) * left_card; cost += op_cost; } void ObOptEstCostModel::cost_late_materialization(double left_card, double left_cost, int64_t column_count, double &cost) { double op_cost = 0.0; double right_card = 1.0; double right_cost = cost_late_materialization_table_get(column_count); cost_late_materialization_table_join(left_card, left_cost, right_card, right_cost, op_cost, cost); } // entry point to estimate table cost int ObOptEstCostModel::cost_table(const ObCostTableScanInfo &est_cost_info, int64_t parallel, double &cost) { int ret = OB_SUCCESS; const double part_cnt = static_cast(est_cost_info.index_meta_info_.index_part_count_); if (OB_UNLIKELY(parallel < 1 || part_cnt < 1)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("get unexpected error", K(parallel), K(part_cnt), K(ret)); } else if (OB_NOT_NULL(est_cost_info.table_meta_info_) && EXTERNAL_TABLE == est_cost_info.table_meta_info_->table_type_) { //TODO [ExternalTable] need refine cost = 4.0 * est_cost_info.phy_query_range_row_count_; } else if (OB_FAIL(cost_basic_table(est_cost_info, part_cnt / parallel, cost))) { LOG_WARN("failed to estimate table cost", K(ret)); } else { /*do nothing*/ } return ret; } int ObOptEstCostModel::cost_table_for_parallel(const ObCostTableScanInfo &est_cost_info, const int64_t parallel, const double part_cnt_per_dop, double &px_cost, double &cost) { int ret = OB_SUCCESS; px_cost = 0.0; cost = 0.0; double table_cost = 0.0; if (OB_UNLIKELY(is_virtual_table(est_cost_info.ref_table_id_))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected virtual table", K(ret), K(est_cost_info.ref_table_id_)); } else if (OB_FAIL(cost_basic_table(est_cost_info, part_cnt_per_dop, table_cost))) { LOG_WARN("Failed to estimate cost", K(ret), K(est_cost_info)); } else if (OB_FAIL(ObOptEstCostModel::cost_px(parallel, px_cost))) { LOG_WARN("Failed to estimate px cost", K(ret), K(parallel)); } else { cost = table_cost + px_cost; LOG_TRACE("OPT:[ESTIMATE TABLE PARALLEL FINISH]", K(cost), K(table_cost), K(px_cost), K(parallel), K(part_cnt_per_dop), K(est_cost_info)); } return ret; } int ObOptEstCostModel::cost_px(int64_t parallel, double &px_cost) { int ret = OB_SUCCESS; px_cost = 0.0; if (parallel <= 1) { /* do nothing */ } else { px_cost = 0.1 * parallel * parallel; } return ret; } // estimate cost for real table // 1. 计算filter选择率 // 2. 判断使用哪种估行方式 // 3. 遍历key ranges, 循环获取ObBatch // 4. 估算每一个ObBatch行数 // 5. 计算每一个ObBatch代价 // 6. 处理相关输出信息 int ObOptEstCostModel::cost_basic_table(const ObCostTableScanInfo &est_cost_info, const double part_cnt_per_dop, double &cost) { int ret = OB_SUCCESS; const ObTableMetaInfo *table_meta_info = est_cost_info.table_meta_info_; if (OB_ISNULL(table_meta_info)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret)); } else { double row_count = est_cost_info.phy_query_range_row_count_; // revise number of output row if is row sample scan if (est_cost_info.sample_info_.is_row_sample()) { row_count *= 0.01 * est_cost_info.sample_info_.percent_; } // calc row count for one partition int64_t part_count = table_meta_info->part_count_; part_count = part_count > 0 ? part_count : 1; double row_count_per_part = row_count / part_count; // calc scan one partition cost if (!est_cost_info.use_column_store_ && OB_FAIL(cost_row_store_basic_table(est_cost_info, row_count_per_part, cost))) { LOG_WARN("Failed to estimate cost", K(ret), K(est_cost_info)); } else if (est_cost_info.use_column_store_ && OB_FAIL(cost_column_store_basic_table(est_cost_info, row_count_per_part, cost))) { LOG_WARN("Failed to estimate cost", K(ret), K(est_cost_info), K(row_count_per_part)); } else { // calc one parallel scan cost cost *= part_cnt_per_dop; LOG_TRACE("OPT:[ESTIMATE FINISH]", K(cost), K(part_cnt_per_dop), K(est_cost_info)); } } return ret; } int ObOptEstCostModel::cost_row_store_basic_table(const ObCostTableScanInfo &est_cost_info, double row_count, double &cost) { int ret = OB_SUCCESS; double index_back_cost = 0; if (OB_FAIL(cost_index_scan(est_cost_info, row_count, cost))) { LOG_WARN("failed to calc index scan cost", K(ret)); } else if (!est_cost_info.index_meta_info_.is_index_back_) { LOG_TRACE("OPT:[COST BASIC TABLE SCAN WITH ROW STORE]", K(row_count), K(cost)); } else if (OB_FAIL(cost_index_back(est_cost_info, row_count, index_back_cost))) { LOG_WARN("failed to calc index back cost", K(ret)); } else { cost += index_back_cost; LOG_TRACE("OPT:[COST BASIC TABLE SCAN WITH ROW STORE]", K(row_count), K(index_back_cost), K(cost)); } return ret; } int ObOptEstCostModel::cost_column_store_basic_table(const ObCostTableScanInfo &est_cost_info, double row_count, double &cost) { int ret = OB_SUCCESS; ObCostTableScanInfo column_group_est_cost_info(OB_INVALID_ID, OB_INVALID_ID, OB_INVALID_ID); double prefix_filter_sel = est_cost_info.join_filter_sel_; cost = 0.0; if (OB_FAIL(column_group_est_cost_info.assign(est_cost_info))) { LOG_WARN("failed to assign est cost info", K(ret)); } else { column_group_est_cost_info.access_column_items_.reuse(); column_group_est_cost_info.prefix_filters_.reuse(); column_group_est_cost_info.postfix_filters_.reuse(); column_group_est_cost_info.use_column_store_ = true; column_group_est_cost_info.join_filter_sel_ = 1.0; } // calc scan cost for each column group for (int64_t i = 0; OB_SUCC(ret) && i 0) { row_count /= est_cost_info.ss_prefix_ndv_; } if (ObSimpleBatch::T_GET == est_cost_info.batch_type_ || ObSimpleBatch::T_MULTI_GET == est_cost_info.batch_type_) { if (OB_FAIL(cost_range_get(est_cost_info, true, row_count, cost))) { LOG_WARN("Failed to estimate get cost", K(ret)); } } else if (ObSimpleBatch::T_SCAN == est_cost_info.batch_type_ || ObSimpleBatch::T_MULTI_SCAN == est_cost_info.batch_type_) { if (OB_FAIL(cost_range_scan(est_cost_info, true, row_count, cost))) { LOG_WARN("Failed to estimate scan cost", K(ret)); } } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid batch type", K(ret), K(est_cost_info.batch_type_)); } //add spatial index scan cost if (OB_FAIL(ret)) { } else if (est_cost_info.index_meta_info_.is_geo_index_) { double spatial_cost = row_count * cost_params_.get_spatial_per_row_cost(sys_stat_); cost += spatial_cost; LOG_TRACE("OPT::[COST SPATIAL INDEX SCAN]", K(spatial_cost), K(ret)); } //add index skip scan cost if (OB_FAIL(ret)) { } else if (!est_cost_info.ss_ranges_.empty()) { cost = cost * est_cost_info.ss_prefix_ndv_; LOG_TRACE("OPT::[COST INDEX SKIP SCAN]", K(est_cost_info.ss_prefix_ndv_), K(cost)); } return ret; } int ObOptEstCostModel::cost_index_back(const ObCostTableScanInfo &est_cost_info, double row_count, double &cost) { int ret = OB_SUCCESS; double network_cost = 0.0; // calc real index back row count double index_back_row_count = row_count * est_cost_info.postfix_filter_sel_; if (OB_FAIL(cost_range_get(est_cost_info, false, index_back_row_count, cost))) { LOG_WARN("Failed to estimate get cost", K(ret)); } else if (est_cost_info.index_meta_info_.is_global_index_ && OB_FAIL(cost_global_index_back_with_rp(index_back_row_count, est_cost_info, network_cost))) { LOG_WARN("failed to get newwork transform cost for global index", K(ret)); } else { cost += network_cost; LOG_TRACE("OPT:[COST INDEX BACK]", K(index_back_row_count), K(network_cost), K(cost)); } return ret; } /* * estimate the network transform and rpc cost for global index, * so far, this cost model should be revised by banliu */ int ObOptEstCostModel::cost_global_index_back_with_rp(double row_count, const ObCostTableScanInfo &est_cost_info, double &cost) { int ret = OB_SUCCESS; const ObTableMetaInfo *table_meta_info = est_cost_info.table_meta_info_; cost = 0.0; if (OB_ISNULL(table_meta_info) || OB_UNLIKELY(table_meta_info->table_column_count_ <= 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("table column count should not be 0", K(table_meta_info->table_column_count_), K(ret)); } else { double column_count = est_cost_info.access_column_items_.count(); double transform_size = (table_meta_info->average_row_size_ * row_count * column_count) /static_cast(table_meta_info->table_column_count_); cost = transform_size * cost_params_.get_network_trans_per_byte_cost(sys_stat_) + row_count * cost_params_.get_table_loopup_per_row_rpc_cost(sys_stat_); LOG_TRACE("OPT::[COST GLOBAL INDEX BACK WITH RPC]", K(cost), K(table_meta_info->average_row_size_), K(row_count), K(table_meta_info->table_column_count_)); } return ret; } int ObOptEstCostModel::cost_range_scan(const ObCostTableScanInfo &est_cost_info, bool is_scan_index, double row_count, double &cost) { int ret = OB_SUCCESS; // 从memtable读取数据的代价,待提供 double memtable_cost = 0; // memtable数据和基线数据合并的代价,待提供 double memtable_merge_cost = 0; double io_cost = 0.0; double cpu_cost = 0.0; if (OB_FAIL(range_scan_io_cost(est_cost_info, is_scan_index, row_count, io_cost))) { LOG_WARN("failed to calc table scan io cost", K(ret)); } else if (OB_FAIL(range_scan_cpu_cost(est_cost_info, is_scan_index, row_count, false, cpu_cost))) { LOG_WARN("failed to calc table scan cpu cost", K(ret)); } else { if (io_cost > cpu_cost) { cost = io_cost + memtable_cost + memtable_merge_cost; } else { cost = cpu_cost + memtable_cost + memtable_merge_cost; } LOG_TRACE("OPT:[COST RANGE SCAN]", K(is_scan_index), K(row_count), K(cost), K(io_cost), K(cpu_cost), K(memtable_cost), K(memtable_merge_cost)); } return ret; } int ObOptEstCostModel::cost_range_get(const ObCostTableScanInfo &est_cost_info, bool is_scan_index, double row_count, double &cost) { int ret = OB_SUCCESS; // 从memtable读取数据的代价,待提供 double memtable_cost = 0; // memtable数据和基线数据合并的代价,待提供 double memtable_merge_cost = 0; double io_cost = 0.0; double cpu_cost = 0.0; if (OB_FAIL(range_get_io_cost(est_cost_info, is_scan_index, row_count, io_cost))) { LOG_WARN("failed to calc table get io cost", K(ret)); } else if (OB_FAIL(range_scan_cpu_cost(est_cost_info, is_scan_index, row_count, true, cpu_cost))) { LOG_WARN("failed to calc table scan cpu cost", K(ret)); } else { double fetch_row_cost = cost_params_.get_fetch_row_rnd_cost(sys_stat_) * row_count; cost = cpu_cost + io_cost + fetch_row_cost + memtable_cost + memtable_merge_cost; LOG_TRACE("OPT:[COST RANGE GET]", K(is_scan_index), K(row_count), K(cost), K(io_cost), K(cpu_cost), K(fetch_row_cost), K(memtable_cost), K(memtable_merge_cost)); } return ret; } int ObOptEstCostModel::range_get_io_cost(const ObCostTableScanInfo &est_cost_info, bool is_scan_index, double row_count, double &cost) { int ret = OB_SUCCESS; cost = 0.0; const ObIndexMetaInfo &index_meta_info = est_cost_info.index_meta_info_; const ObTableMetaInfo *table_meta_info = est_cost_info.table_meta_info_; if (OB_ISNULL(table_meta_info) || row_count < 0) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret)); } else { //索引总的微块数 = 总大小/微块大小 //计算涉及的微块数 double num_micro_blocks = 0; if (is_scan_index) { num_micro_blocks = index_meta_info.get_micro_block_numbers(); } else { num_micro_blocks = table_meta_info->get_micro_block_numbers(); } double num_micro_blocks_read = 0; const double table_row_count = static_cast(table_meta_info->table_row_count_); if (OB_LIKELY(table_row_count > 0 && row_count <= table_row_count)) { num_micro_blocks_read = num_micro_blocks * (1.0 - std::pow((1.0 - row_count / table_row_count), table_row_count / num_micro_blocks)); num_micro_blocks_read = std::ceil(num_micro_blocks_read); } else { num_micro_blocks_read = num_micro_blocks; } // IO代价,包括读取整个微块及反序列化的代价和每行定位微块的代价 double first_block_cost = cost_params_.get_micro_block_rnd_cost(sys_stat_); if (est_cost_info.is_inner_path_) { if (est_cost_info.can_use_batch_nlj_) { first_block_cost = cost_params_.get_batch_nl_get_cost(sys_stat_); } else { first_block_cost = cost_params_.get_nl_get_cost(sys_stat_); } } if (num_micro_blocks_read < 1) { cost = 0; } else { cost = first_block_cost + cost_params_.get_micro_block_rnd_cost(sys_stat_) * (num_micro_blocks_read-1); } LOG_TRACE("OPT:[COST RANGE GET IO]", K(is_scan_index), K(row_count), K(cost), K(num_micro_blocks), K(num_micro_blocks_read), K(first_block_cost)); } return ret; } int ObOptEstCostModel::range_scan_io_cost(const ObCostTableScanInfo &est_cost_info, bool is_scan_index, double row_count, double &cost) { int ret = OB_SUCCESS; cost = 0.0; const ObIndexMetaInfo &index_meta_info = est_cost_info.index_meta_info_; const ObTableMetaInfo *table_meta_info = est_cost_info.table_meta_info_; if (OB_ISNULL(table_meta_info) || row_count < 0) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(row_count), KP(table_meta_info)); } else { //索引总的微块数 = 总大小/微块大小 //计算涉及的微块数 double num_micro_blocks = 0; if (!is_scan_index) { num_micro_blocks = table_meta_info->get_micro_block_numbers(); } else { num_micro_blocks = index_meta_info.get_micro_block_numbers(); } //读微块数 = 总微块数 * 读行比例 double num_micro_blocks_read = 0; const double table_row_count = static_cast(table_meta_info->table_row_count_); if (OB_LIKELY(table_row_count > 0 && row_count <= table_row_count)) { num_micro_blocks_read = std::ceil(num_micro_blocks * row_count / table_row_count); } else { num_micro_blocks_read = num_micro_blocks; } // IO代价,主要包括读取微块、反序列化的代价的代价 double first_block_cost = cost_params_.get_micro_block_rnd_cost(sys_stat_); if (!est_cost_info.pushdown_prefix_filters_.empty()) { if (est_cost_info.can_use_batch_nlj_) { first_block_cost = cost_params_.get_batch_nl_scan_cost(sys_stat_); } else { first_block_cost = cost_params_.get_nl_scan_cost(sys_stat_); } } if (num_micro_blocks_read < 1) { cost = first_block_cost; } else { cost = first_block_cost + cost_params_.get_micro_block_seq_cost(sys_stat_) * (num_micro_blocks_read-1); } LOG_TRACE("OPT:[COST RANGE SCAN IO]", K(is_scan_index), K(row_count), K(cost), K(num_micro_blocks), K(num_micro_blocks_read), K(first_block_cost)); } return ret; } int ObOptEstCostModel::range_scan_cpu_cost(const ObCostTableScanInfo &est_cost_info, bool is_scan_index, double row_count, bool is_get, double &cost) { int ret = OB_SUCCESS; double project_cost = 0.0; const ObIndexMetaInfo &index_meta_info = est_cost_info.index_meta_info_; const ObTableMetaInfo *table_meta_info = est_cost_info.table_meta_info_; bool is_index_back = index_meta_info.is_index_back_; if (OB_ISNULL(table_meta_info)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret)); } else if (is_scan_index && is_index_back) { if (OB_FAIL(cost_project(row_count, est_cost_info.index_access_column_items_, is_get, est_cost_info.use_column_store_, project_cost))) { LOG_WARN("failed to cost project", K(ret)); } } else if (est_cost_info.use_column_store_) { if (OB_FAIL(cost_project(row_count, est_cost_info.access_column_items_, is_get, est_cost_info.use_column_store_, project_cost))) { LOG_WARN("failed to cost project", K(ret)); } } else { if (OB_FAIL(cost_full_table_scan_project(row_count, est_cost_info, is_get, project_cost))) { LOG_WARN("failed to cost project", K(ret)); } } if (OB_FAIL(ret)) { } else { // 谓词代价,主要指filter的代价 double qual_cost = 0.0; if (!is_index_back) { // 全表扫描 qual_cost += cost_quals(row_count, est_cost_info.postfix_filters_); qual_cost += cost_quals(row_count, est_cost_info.table_filters_); } else if (is_scan_index) { // 索引扫描 qual_cost += cost_quals(row_count, est_cost_info.postfix_filters_); } else { // 回表扫描 qual_cost += cost_quals(row_count, est_cost_info.table_filters_); } // CPU代价,包括get_next_row调用的代价和谓词代价 double range_cost = 0; range_cost = est_cost_info.ranges_.count() * cost_params_.get_range_cost(sys_stat_); cost = row_count * cost_params_.get_cpu_tuple_cost(sys_stat_); cost += range_cost + qual_cost + project_cost; LOG_TRACE("OPT: [RANGE SCAN CPU COST]", K(is_scan_index), K(is_get), K(cost), K(qual_cost), K(project_cost), K(range_cost), K(row_count)); } return ret; } int ObOptEstCostModel::get_sort_cmp_cost(const common::ObIArray &types, double &cost) { int ret = OB_SUCCESS; double cost_ret = 0.0; if (OB_UNLIKELY(types.count() < 1)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid col count", "col count", types.count(), K(ret)); } else { double factor = 1.0; for (int64_t i = 0; OB_SUCC(ret) && i < types.count(); ++i) { ObObjTypeClass tc = types.at(i).get_type_class(); if (OB_UNLIKELY(tc >= ObMaxTC)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("not supported type class", K(tc), K(ret)); } else { //Correctly estimating cmp cost need NDVs of each sort col: // if first col is identical, then we needn't compare the second col and so on. //But now we cannot get hand on NDV easily, just use // cmp_cost_col0 + cmp_cost_col1 / DEF_NDV + cmp_cost_col2 / DEF_NDV^2 ... double cost_for_col = cost_params_.get_comparison_cost(sys_stat_, tc);; if (OB_UNLIKELY(cost_for_col < 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("not supported type class", K(tc), K(ret)); } else { cost_ret += cost_for_col * factor; factor /= 10.0; } } } if (OB_SUCC(ret)) { cost = cost_ret; } } return ret; } int ObOptEstCostModel::cost_window_function(double rows, double width, double win_func_cnt, double &cost) { int ret = OB_SUCCESS; cost += rows * cost_params_.get_cpu_tuple_cost(sys_stat_); cost += ObOptEstCostModel::cost_material(rows, width) + ObOptEstCostModel::cost_read_materialized(rows); cost += rows * cost_params_.get_per_win_func_cost(sys_stat_) * win_func_cnt; return ret; } /** * @brief 计算filter的代价 * @formula cost = rows * CPU_TUPLE_COST + cost_quals * @param[in] rows 输入行数 * @param[in] filters filter数量 * @return 算子代价 */ double ObOptEstCostModel::cost_filter_rows(double rows, ObIArray &filters) { return rows * cost_params_.get_cpu_tuple_cost(sys_stat_) + cost_quals(rows, filters); } /** * @brief 估算SubplanFilter的代价 * * @formula 除最左的子节点以外,其它的节点都是一个filter,filter分成3种类型: * 1. onetime expr:这种类型的filter只需要计算一次,而且不需要物化 * 2. initplan : 这种类型的filter只需要计算一次,之后物化,从物化的数据中读取 * 3. 其它 : 剩下的所有filter每次都要重新计算 */ int ObOptEstCostModel::cost_subplan_filter(const ObSubplanFilterCostInfo &info, double &cost) { int ret = OB_SUCCESS; cost = 0.0; double onetime_cost = 0.0; if (info.children_.count() > 0) { cost += info.children_.at(0).rows_ * cost_params_.get_cpu_tuple_cost(sys_stat_); } for (int64_t i = 1; OB_SUCC(ret) && i < info.children_.count(); ++i) { const ObBasicCostInfo &child = info.children_.at(i); //判断是否为onetime expr; if (info.onetime_idxs_.has_member(i)) { // onetime cost // 这个子节点是一个onetime expr // 则只需要进行一次右表计算,且不物化 onetime_cost += child.cost_; } else if (info.initplan_idxs_.has_member(i)) { // init plan cost // 这个子节点是一个initplan // 对右表进行物化,之后只需读取物化后的行 onetime_cost += child.cost_ + child.rows_ * cost_params_.get_cpu_tuple_cost(sys_stat_) + cost_material(child.rows_, child.width_); cost += info.children_.at(0).rows_ * cost_read_materialized(child.rows_); } else { // other cost // 一般情况,每一次都要扫描右表 cost += info.children_.at(0).rows_ * (child.cost_ + child.rows_ * cost_params_.get_cpu_tuple_cost(sys_stat_)); if (child.exchange_allocated_) { cost += cost_params_.get_px_rescan_per_row_cost(sys_stat_) * info.children_.at(0).rows_; } } } // for info_childs end if (OB_SUCC(ret)) { cost += onetime_cost; LOG_TRACE("OPT: [COST SUBPLAN FILTER]", K(cost), K(onetime_cost), K(info)); } return ret; } int ObOptEstCostModel::cost_union_all(const ObCostMergeSetInfo &info, double &cost) { int ret = OB_SUCCESS; double total_rows = 0.0; for (int64_t i = 0; i < info.children_.count(); ++i) { total_rows += info.children_.at(i).rows_; } cost = total_rows * cost_params_.get_cpu_tuple_cost(sys_stat_); return ret; } /** * @brief 计算集合运算的代价(包括union / except / intersect) * @param[in] info 估算集合运算代价所需要的一些参数 * @param[out] cost 估算出的集合运算算子本身的代价 * 对于merge set,可能出现set op展平的情况,所以需要考虑多个孩子节点 */ int ObOptEstCostModel::cost_merge_set(const ObCostMergeSetInfo &info, double &cost) { int ret = OB_SUCCESS; double sum_rows = 0; double width = 0.0; for (int64_t i = 0; i < info.children_.count(); ++i) { sum_rows += info.children_.at(i).rows_; width = info.children_.at(i).width_; } cost = 0.0; //get next row cost cost += sum_rows * cost_params_.get_cpu_tuple_cost(sys_stat_); cost += cost_material(sum_rows, width); //operator cost:cmp_cost + cpu_cost LOG_TRACE("OPT: [COST MERGE SET]", K(cost), K(sum_rows), K(width)); return ret; } /** * @brief 计算集合运算的代价(包括union / except / intersect) * @param[in] info 估算集合运算代价所需要的一些参数 * @param[out] cost 估算出的集合运算算子本身的代价 * 对于hash set,不会出现set op展平的情况,所以只需要考虑两个孩子节点 */ int ObOptEstCostModel::cost_hash_set(const ObCostHashSetInfo &info, double &cost) { int ret = OB_SUCCESS; double build_rows = 0.0; double probe_rows = 0.0; if (ObSelectStmt::UNION == info.op_) { build_rows = info.left_rows_ + info.right_rows_; probe_rows = info.left_rows_ + info.right_rows_; } else if (ObSelectStmt::INTERSECT == info.op_) { build_rows = info.left_rows_; probe_rows = info.left_rows_ + info.right_rows_; } else if (ObSelectStmt::EXCEPT == info.op_) { build_rows = info.left_rows_; probe_rows = info.left_rows_ + info.right_rows_; } cost = 0.0; //get_next_row() 代价 cost += cost_params_.get_cpu_tuple_cost(sys_stat_) * (info.left_rows_ + info.right_rows_); //material cost cost += cost_material(info.left_rows_, info.left_width_) + cost_material(info.right_rows_, info.right_width_); //build hash table cost cost += cost_params_.get_build_hash_per_row_cost(sys_stat_) * build_rows; //probe hash table cost cost += cost_params_.get_probe_hash_per_row_cost(sys_stat_) * probe_rows; //计算 hash 的代价 cost += cost_hash(info.left_rows_ + info.right_rows_, info.hash_columns_); LOG_TRACE("OPT: [COST HASH SET]", K(cost)); return ret; } /** * @brief 计算hash值的代价 * @note(@ banliu.zyd) 这个函数用于估算hash计算的代价,为了使代码简洁不侵入,这个函数 * 直接以计算hash的代价为返回值,对于发现的某个谓词为空直接跳过,认为 * 在其它地方有对谓词是否存在错误的判断,检测的逻辑不应在这里 * @param[in] rows 数据行数 * @param[in] hash_exprs hash列数组 * */ double ObOptEstCostModel::cost_hash(double rows, const ObIArray &hash_exprs) { double cost_per_row = 0.0; for (int64_t i = 0; i < hash_exprs.count(); ++i) { const ObRawExpr *expr = hash_exprs.at(i); if (OB_ISNULL(expr)) { LOG_WARN_RET(OB_ERR_UNEXPECTED, "qual should not be NULL, but we don't set error return code here, just skip it"); } else { ObObjTypeClass calc_type = expr->get_result_type().get_calc_type_class(); cost_per_row += cost_params_.get_hash_cost(sys_stat_,calc_type); } } return rows * cost_per_row; } int ObOptEstCostModel::cost_project(double rows, const ObIArray &columns, bool is_get, bool use_column_store, double &cost) { int ret = OB_SUCCESS; ObSEArray project_columns; for (int i = 0; OB_SUCC(ret) && i < columns.count(); ++i) { const ColumnItem &column_item = columns.at(i); ObRawExpr *expr = column_item.expr_; if (OB_FAIL(project_columns.push_back(expr))) { LOG_WARN("failed to push back expr", K(ret)); } } if (OB_SUCC(ret) && OB_FAIL(cost_project(rows, project_columns, is_get, use_column_store, cost))) { LOG_WARN("failed to calc project cost", K(ret)); } return ret; } int ObOptEstCostModel::cost_project(double rows, const ObIArray &columns, bool is_get, bool use_column_store, double &cost) { int ret = OB_SUCCESS; double project_one_row_cost = 0.0; for (int i = 0; OB_SUCC(ret) && i < columns.count(); ++i) { ObRawExpr *expr = columns.at(i); if (OB_ISNULL(expr)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpect null expr", K(ret)); } else if (expr->get_ref_count() <= 0) { //do nothing } else { const ObExprResType &type = expr->get_result_type(); if (type.is_integer_type()) { // int project_one_row_cost += cost_params_.get_project_column_cost(sys_stat_, PROJECT_INT, is_get, use_column_store); } else if (type.get_accuracy().get_length() > 0) { // ObStringTC int64_t string_width = type.get_accuracy().get_length(); string_width = std::min(string_width, ObOptEstCostModel::DEFAULT_MAX_STRING_WIDTH); project_one_row_cost += cost_params_.get_project_column_cost(sys_stat_, PROJECT_CHAR, is_get, use_column_store) * string_width; } else if (type.get_accuracy().get_precision() > 0 || type.is_oracle_integer()) { // number, time project_one_row_cost += cost_params_.get_project_column_cost(sys_stat_, PROJECT_NUMBER, is_get, use_column_store); } else { // default for DEFAULT PK project_one_row_cost += cost_params_.get_project_column_cost(sys_stat_, PROJECT_INT, is_get, use_column_store); } } } cost = project_one_row_cost * rows; LOG_TRACE("COST PROJECT:", K(cost), K(rows), K(columns)); return ret; } int ObOptEstCostModel::cost_full_table_scan_project(double rows, const ObCostTableScanInfo &est_cost_info, bool is_get, double &cost) { int ret = OB_SUCCESS; ObSEArray filter_columns; double cost_project_filter_column = 0; double project_one_row_cost = 0; double project_full_row_count = rows * est_cost_info.table_filter_sel_ * est_cost_info.join_filter_sel_; if (OB_FAIL(ObRawExprUtils::extract_column_exprs(est_cost_info.postfix_filters_, filter_columns))) { LOG_WARN("failed to extract column exprs", K(ret)); } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(est_cost_info.table_filters_, filter_columns))) { LOG_WARN("failed to extract column exprs", K(ret)); } else if (OB_FAIL(cost_project(project_full_row_count, est_cost_info.access_column_items_, is_get, est_cost_info.use_column_store_, cost))) { LOG_WARN("failed to calc project cost", K(ret)); } else if (OB_FAIL(cost_project(rows, filter_columns, is_get, est_cost_info.use_column_store_, cost_project_filter_column))) { LOG_WARN("failed to calc project cost", K(ret)); } else { cost += cost_project_filter_column; LOG_TRACE("COST TABLE SCAN PROJECT:", K(rows), K(project_full_row_count), K(cost_project_filter_column), K(cost)); } return ret; } /** * @brief 计算谓词部分的代价 * @note(@ banliu.zyd) 这个函数用于估算谓词计算的代价,为了使代码简洁不侵入,这个函数 * 直接以谓词代价为返回值,对于发现的某个谓词为空直接跳过,认为 * 在其它地方有对谓词是否存在错误的判断,检测的逻辑不应在这里 * @param[in] rows 数据行数 * @param[in] quals 谓词数组 * */ // 谓词代价 = 行数 * sum(不同谓词类型比较的代价) double ObOptEstCostModel::cost_quals(double rows, const ObIArray &quals, bool need_scale) { double factor = 1.0; double cost_per_row = 0.0; for (int64_t i = 0; i < quals.count(); ++i) { const ObRawExpr *qual = quals.at(i); if (OB_ISNULL(qual)) { LOG_WARN_RET(OB_ERR_UNEXPECTED, "qual should not be NULL, but we don't set error return code here, just skip it"); } else if (qual->is_spatial_expr()) { cost_per_row += cost_params_.get_cmp_spatial_cost(sys_stat_) * factor; if (need_scale) { factor /= 10.0; } } else { ObObjTypeClass calc_type = qual->get_result_type().get_calc_type_class(); cost_per_row += cost_params_.get_comparison_cost(sys_stat_, calc_type) * factor; if (need_scale) { factor /= 10.0; } } } return rows * cost_per_row; } int ObOptEstCostModel::cost_insert(ObDelUpCostInfo& cost_info, double &cost) { int ret = OB_SUCCESS; cost = cost_params_.get_cpu_tuple_cost(sys_stat_) * cost_info.affect_rows_ + cost_params_.get_insert_per_row_cost(sys_stat_) * cost_info.affect_rows_ + cost_params_.get_insert_index_per_row_cost(sys_stat_) * cost_info.index_count_ + cost_params_.get_insert_check_per_row_cost(sys_stat_) * cost_info.constraint_count_; return ret; } int ObOptEstCostModel::cost_update(ObDelUpCostInfo& cost_info, double &cost) { int ret = OB_SUCCESS; cost = cost_params_.get_cpu_tuple_cost(sys_stat_) * cost_info.affect_rows_ + cost_params_.get_update_per_row_cost(sys_stat_) * cost_info.affect_rows_ + cost_params_.get_update_index_per_row_cost(sys_stat_) * cost_info.index_count_ + cost_params_.get_update_check_per_row_cost(sys_stat_) * cost_info.constraint_count_; return ret; } int ObOptEstCostModel::cost_delete(ObDelUpCostInfo& cost_info, double &cost) { int ret = OB_SUCCESS; cost = cost_params_.get_cpu_tuple_cost(sys_stat_) * cost_info.affect_rows_ + cost_params_.get_delete_per_row_cost(sys_stat_) * cost_info.affect_rows_ + cost_params_.get_delete_index_per_row_cost(sys_stat_) * cost_info.index_count_ + cost_params_.get_delete_check_per_row_cost(sys_stat_) * cost_info.constraint_count_; return ret; } int ObOptEstCostModel::calc_range_cost(const ObTableMetaInfo& table_meta_info, const ObIArray &filters, int64_t index_column_count, int64_t range_count, double range_sel, double &cost) { int ret = OB_SUCCESS; cost = 0; int64_t row_count = table_meta_info.table_row_count_ * range_sel; double num_micro_blocks = -1; if (table_meta_info.has_opt_stat_) { num_micro_blocks = table_meta_info.micro_block_count_; num_micro_blocks *= index_column_count * 1.0 / table_meta_info.table_column_count_; } double num_micro_blocks_read = 0; if (OB_LIKELY(table_meta_info.table_row_count_ > 0)) { num_micro_blocks_read = std::ceil(num_micro_blocks * row_count / static_cast (table_meta_info.table_row_count_)); } double io_cost = cost_params_.get_micro_block_seq_cost(sys_stat_) * num_micro_blocks_read; double qual_cost = cost_quals(row_count, filters); double cpu_cost = row_count * cost_params_.get_cpu_tuple_cost(sys_stat_) + range_count * cost_params_.get_range_cost(sys_stat_) + qual_cost; cpu_cost += row_count * cost_params_.get_table_scan_cpu_tuple_cost(sys_stat_); cost = io_cost + cpu_cost; return ret; }