diff --git a/deps/oblib/src/common/ob_range.h b/deps/oblib/src/common/ob_range.h index 3a68d28733..d247c69b70 100644 --- a/deps/oblib/src/common/ob_range.h +++ b/deps/oblib/src/common/ob_range.h @@ -485,6 +485,11 @@ public: return (start_key_.is_min_row()) && (end_key_.is_max_row()); } + inline bool is_false_range() const + { + return (start_key_.is_max_row()) && (end_key_.is_min_row()); + } + /* inline bool is_close_range() const { diff --git a/src/pl/sys_package/ob_dbms_stats.cpp b/src/pl/sys_package/ob_dbms_stats.cpp index fc6db176ce..f934d87667 100644 --- a/src/pl/sys_package/ob_dbms_stats.cpp +++ b/src/pl/sys_package/ob_dbms_stats.cpp @@ -2006,10 +2006,13 @@ int ObDbmsStats::lock_table_stats(sql::ObExecContext &ctx, stat_param.global_stat_param_.need_modify_ = true; stat_param.part_stat_param_.need_modify_ = true; stat_param.subpart_stat_param_.need_modify_ = true; + stat_param.no_invalidate_ = true; if (OB_FAIL(ObDbmsStatsLockUnlock::set_table_stats_lock(ctx, stat_param, true))) { LOG_WARN("failed to lock table stats", K(ret)); } else if (OB_FAIL(lock_or_unlock_index_stats(ctx, stat_param, true))) { LOG_WARN("failed to lock index stats", K(ret)); + } else if (OB_FAIL(update_stat_cache(ctx.get_my_session()->get_rpc_tenant_id(), stat_param))) { + LOG_WARN("failed to update stat cache", K(ret)); } else {/*do nothing*/} } return ret; @@ -2053,8 +2056,11 @@ int ObDbmsStats::lock_partition_stats(sql::ObExecContext &ctx, stat_param.global_stat_param_.need_modify_ = false; stat_param.part_stat_param_.need_modify_ = true; stat_param.subpart_stat_param_.need_modify_ = false; + stat_param.no_invalidate_ = true; if (OB_FAIL(ObDbmsStatsLockUnlock::set_table_stats_lock(ctx, stat_param, true))) { LOG_WARN("failed to lock table stats", K(ret)); + } else if (OB_FAIL(update_stat_cache(ctx.get_my_session()->get_rpc_tenant_id(), stat_param))) { + LOG_WARN("failed to update stat cache", K(ret)); } else {/*do nothing */} } return ret; @@ -2111,10 +2117,13 @@ int ObDbmsStats::lock_schema_stats(sql::ObExecContext &ctx, stat_param.part_stat_param_.need_modify_ = true; stat_param.subpart_stat_param_.need_modify_ = true; stat_param.allocator_ = &tmp_alloc;//use the temp allocator free memory after stat lock + stat_param.no_invalidate_ = true; if (OB_FAIL(ObDbmsStatsLockUnlock::set_table_stats_lock(ctx, stat_param, true))) { LOG_WARN("failed to lock table stats", K(ret)); } else if (OB_FAIL(lock_or_unlock_index_stats(ctx, stat_param, true))) { LOG_WARN("failed to lock index stats", K(ret)); + } else if (OB_FAIL(update_stat_cache(ctx.get_my_session()->get_rpc_tenant_id(), stat_param))) { + LOG_WARN("failed to update stat cache", K(ret)); } else { tmp_alloc.reset(); } @@ -2201,10 +2210,13 @@ int ObDbmsStats::unlock_table_stats(sql::ObExecContext &ctx, stat_param.global_stat_param_.need_modify_ = true; stat_param.part_stat_param_.need_modify_ = true; stat_param.subpart_stat_param_.need_modify_ = true; + stat_param.no_invalidate_ = true; if (OB_FAIL(ObDbmsStatsLockUnlock::set_table_stats_lock(ctx, stat_param, false))) { LOG_WARN("failed to lock table stats", K(ret)); } else if (OB_FAIL(lock_or_unlock_index_stats(ctx, stat_param, false))) { LOG_WARN("failed to lock index stats", K(ret)); + } else if (OB_FAIL(update_stat_cache(ctx.get_my_session()->get_rpc_tenant_id(), stat_param))) { + LOG_WARN("failed to update stat cache", K(ret)); } else {/*do nothing*/} } return ret; @@ -2248,8 +2260,11 @@ int ObDbmsStats::unlock_partition_stats(sql::ObExecContext &ctx, stat_param.global_stat_param_.need_modify_ = false; stat_param.part_stat_param_.need_modify_ = true; stat_param.subpart_stat_param_.need_modify_ = false; + stat_param.no_invalidate_ = true; if (OB_FAIL(ObDbmsStatsLockUnlock::set_table_stats_lock(ctx, stat_param, false))) { LOG_WARN("failed to lock table stats", K(ret)); + } else if (OB_FAIL(update_stat_cache(ctx.get_my_session()->get_rpc_tenant_id(), stat_param))) { + LOG_WARN("failed to update stat cache", K(ret)); } else {/*do nothing */} } return ret; @@ -2306,10 +2321,13 @@ int ObDbmsStats::unlock_schema_stats(sql::ObExecContext &ctx, stat_param.part_stat_param_.need_modify_ = true; stat_param.subpart_stat_param_.need_modify_ = true; stat_param.allocator_ = &tmp_alloc;//use the temp allocator to free memory after stat unlock + stat_param.no_invalidate_ = true; if (OB_FAIL(ObDbmsStatsLockUnlock::set_table_stats_lock(ctx, stat_param, false))) { LOG_WARN("failed to lock table stats", K(ret)); } else if (OB_FAIL(lock_or_unlock_index_stats(ctx, stat_param, false))) { LOG_WARN("failed to lock index stats", K(ret)); + } else if (OB_FAIL(update_stat_cache(ctx.get_my_session()->get_rpc_tenant_id(), stat_param))) { + LOG_WARN("failed to update stat cache", K(ret)); } else { tmp_alloc.reset(); } diff --git a/src/share/stat/ob_opt_stat_manager.cpp b/src/share/stat/ob_opt_stat_manager.cpp index 8b08b6b741..2e42a468d4 100644 --- a/src/share/stat/ob_opt_stat_manager.cpp +++ b/src/share/stat/ob_opt_stat_manager.cpp @@ -636,6 +636,7 @@ int ObOptStatManager::get_table_stat(const uint64_t tenant_id, opt_stat.get_macro_block_num() * scale_ratio, opt_stat.get_micro_block_num() * scale_ratio); stat.set_last_analyzed(opt_stat.get_last_analyzed()); + stat.set_stat_locked(opt_stat.is_locked()); } return ret; } diff --git a/src/share/stat/ob_opt_stat_sql_service.cpp b/src/share/stat/ob_opt_stat_sql_service.cpp index f35348d4cf..3b389012e2 100644 --- a/src/share/stat/ob_opt_stat_sql_service.cpp +++ b/src/share/stat/ob_opt_stat_sql_service.cpp @@ -1953,7 +1953,8 @@ int ObOptStatSqlService::fetch_table_rowcnt(const uint64_t tenant_id, ObSchemaUtils::get_real_table_mappings_tid(table_id) : table_id; if (OB_FAIL(gen_tablet_list_str(all_tablet_ids, all_ls_ids, tablet_list_str, tablet_ls_list_str))) { LOG_WARN("failed to gen tablet list str", K(ret)); - } else if (OB_FAIL(raw_sql.append_fmt("select /*+opt_param('enable_in_range_optimization','true')*/ tablet_id, max(row_count) from "\ + } else if (OB_FAIL(raw_sql.append_fmt("select /*+opt_param('enable_in_range_optimization','true') opt_param('use_default_opt_stat','true')*/"\ + "tablet_id, max(row_count) from "\ "(select cast(tablet_id as unsigned) as tablet_id, cast(inserts - deletes as signed) as row_count "\ "from %s where tenant_id = %lu and table_id = %lu and tablet_id in %s union all "\ "select cast(tablet_id as unsigned) as tablet_id, cast(row_count as signed) as row_count from %s, "\ diff --git a/src/share/stat/ob_stat_item.h b/src/share/stat/ob_stat_item.h index c19681c242..e3f4e43ddc 100644 --- a/src/share/stat/ob_stat_item.h +++ b/src/share/stat/ob_stat_item.h @@ -302,7 +302,8 @@ public: ObGlobalTableStat() : row_count_(0), row_size_(0), data_size_(0), macro_block_count_(0), micro_block_count_(0), part_cnt_(0), last_analyzed_(0), - cg_macro_cnt_arr_(), cg_micro_cnt_arr_() + cg_macro_cnt_arr_(), cg_micro_cnt_arr_(), + stat_locked_(false) {} void add(int64_t rc, int64_t rs, int64_t ds, int64_t mac, int64_t mic); @@ -318,6 +319,8 @@ public: const ObIArray &get_cg_macro_arr() const { return cg_macro_cnt_arr_; } const ObIArray &get_cg_micro_arr() const { return cg_micro_cnt_arr_; } void set_last_analyzed(int64_t last_analyzed) { last_analyzed_ = last_analyzed; } + void set_stat_locked(bool locked) { stat_locked_ = locked; } + bool get_stat_locked() const { return stat_locked_; } TO_STRING_KV(K(row_count_), @@ -328,7 +331,8 @@ public: K(part_cnt_), K(last_analyzed_), K(cg_macro_cnt_arr_), - K(cg_micro_cnt_arr_)); + K(cg_micro_cnt_arr_), + K(stat_locked_)); private: int64_t row_count_; @@ -340,6 +344,7 @@ private: int64_t last_analyzed_; ObArray cg_macro_cnt_arr_; ObArray cg_micro_cnt_arr_; + bool stat_locked_; }; class ObGlobalNullEval diff --git a/src/sql/CMakeLists.txt b/src/sql/CMakeLists.txt index eae22652ac..42bb19121c 100644 --- a/src/sql/CMakeLists.txt +++ b/src/sql/CMakeLists.txt @@ -937,6 +937,7 @@ ob_set_subtarget(ob_sql optimizer optimizer/ob_raw_expr_get_hash_value.cpp optimizer/ob_replica_compare.cpp optimizer/ob_route_policy.cpp + optimizer/ob_sel_estimator.cpp optimizer/ob_select_log_plan.cpp optimizer/ob_sharding_info.cpp optimizer/ob_skyline_prunning.cpp diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index 73657cbd01..137249989a 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -4726,7 +4726,7 @@ int ObStaticEngineCG::generate_normal_tsc(ObLogTableScan &op, ObTableScanSpec &s spec.output_row_count_ = static_cast(op.get_output_row_count()); spec.query_range_row_count_ = static_cast(op.get_logical_query_range_row_count()); spec.index_back_row_count_ = static_cast(op.get_index_back_row_count()); - spec.estimate_method_ = op.get_estimate_method(); + spec.estimate_method_ = INVALID_METHOD; spec.table_name_ = tbl_name; spec.index_name_ = index_name; // das path not under gi control (TODO: separate gi_above flag from das tsc spec) diff --git a/src/sql/optimizer/ob_access_path_estimation.cpp b/src/sql/optimizer/ob_access_path_estimation.cpp index a7b0f16e97..3aa7aa52e0 100644 --- a/src/sql/optimizer/ob_access_path_estimation.cpp +++ b/src/sql/optimizer/ob_access_path_estimation.cpp @@ -21,6 +21,7 @@ #include "observer/ob_sql_client_decorator.h" #include "share/stat/ob_dbms_stats_utils.h" #include "rootserver/ob_root_service.h" +#include "sql/optimizer/ob_sel_estimator.h" namespace oceanbase { using namespace share::schema; using namespace share; @@ -31,98 +32,363 @@ int ObAccessPathEstimation::estimate_rowcount(ObOptimizerContext &ctx, common::ObIArray &paths, const bool is_inner_path, const ObIArray &filter_exprs, - bool &is_use_ds) + ObBaseTableEstMethod &method) { int ret = OB_SUCCESS; - common::ObSEArray no_ds_paths; - is_use_ds = false; - if (OB_FAIL(process_dynamic_sampling_estimation(ctx, paths, is_inner_path, filter_exprs, no_ds_paths))) { - LOG_WARN("failed to process dynamic sampling estimation", K(ret)); - } else if (no_ds_paths.empty()) { - is_use_ds = true; - } else if (OB_FAIL(process_common_estimate_rowcount(ctx, no_ds_paths))) { - LOG_WARN("failed to process common estimate rowcount", K(ret)); - } else {/*do nothing*/} - return ret; -} + ObBaseTableEstMethod valid_methods = 0; + method = EST_INVALID; -/// It is possible for us to find a better way to combine differnt kinds of cardinality -/// estimation methods. e.g. if a table is found to be uniformally distributed over partitions, -/// we can do a storage estimation on one of these parts, and then deduce the total row count. -int ObAccessPathEstimation::process_common_estimate_rowcount(ObOptimizerContext &ctx, - common::ObIArray &paths) -{ - int ret = OB_SUCCESS; - ObArray tmp; - - // wo do statistics estimation for all paths, - // the storage estimation is an advanced tech, which introduces more accurate results - // but it has serveral limitations, hence we check its usage here - for (int64_t i = 0; OB_SUCC(ret) && i < paths.count(); ++i) { - bool use_storage_stat = false; - bool use_default_vt = false; - if (ctx.use_default_stat()) { - if (OB_FAIL(process_table_default_estimation(paths.at(i)))) { - LOG_WARN("failed to process process vtable default estimation", K(ret)); - } - } else if (OB_FAIL(choose_best_estimation_method(paths.at(i), - *(paths.at(i)->est_cost_info_.table_meta_info_), - use_storage_stat, - use_default_vt))) { - LOG_WARN("failed to choose best estimation method", K(ret)); - } else if (use_default_vt) { - if (OB_FAIL(process_vtable_default_estimation(paths.at(i)))) { - LOG_WARN("failed to process process vtable default estimation", K(ret)); - } - } else if (EXTERNAL_TABLE == paths.at(i)->est_cost_info_.table_meta_info_->table_type_) { - if (OB_FAIL(process_external_table_estimation(paths.at(i)))) { - LOG_WARN("failed to process external table estimation", K(ret)); - } - } else if (OB_FAIL(process_statistics_estimation(paths.at(i)))) { - LOG_WARN("failed to process statistics estimation", K(ret)); - } else if (!use_storage_stat) { - // do nothing - } else if (OB_FAIL(tmp.push_back(paths.at(i)))) { - LOG_WARN("failed to push back path", K(ret)); - } - } - if (OB_SUCC(ret) && !tmp.empty()) { - if (OB_FAIL(process_storage_estimation(ctx, tmp))) { - LOG_WARN("failed to process storage estimation", K(ret)); - } - } - return ret; -} - -int ObAccessPathEstimation::choose_best_estimation_method(const AccessPath *path, - const ObTableMetaInfo &meta, - bool &use_storage_stat, - bool &use_default_vt) -{ - int ret = OB_SUCCESS; - use_storage_stat = false; - use_default_vt = false; - if (OB_ISNULL(path)) { + if (OB_UNLIKELY(paths.empty())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("access path is invalid", K(ret), K(path)); - } else if (is_virtual_table(path->ref_table_id_) && - !share::is_oracle_mapping_real_virtual_table(path->ref_table_id_)) { - use_default_vt = !meta.has_opt_stat_; - } else if (OB_FAIL(check_path_can_use_stroage_estimate(path, use_storage_stat))) { - LOG_WARN("failed to check path can use stroage estimate", K(ret)); + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(get_valid_est_methods(ctx, paths, filter_exprs, is_inner_path, valid_methods))) { + LOG_WARN("failed to get valid est methods", K(ret)); + } else if (OB_FAIL(choose_best_est_method(ctx, paths, filter_exprs, valid_methods, method))) { + LOG_WARN("failed to choose one est method", K(ret), K(valid_methods)); + } else if (OB_FAIL(do_estimate_rowcount(ctx, paths, is_inner_path, filter_exprs, method))) { + LOG_WARN("failed to do estimate rowcount", K(ret), K(method), K(valid_methods)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < paths.count(); i ++) { + if (OB_ISNULL(paths.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(paths.at(i))); + } else { + paths.at(i)->est_cost_info_.est_method_ = method; + } } return ret; } -int ObAccessPathEstimation::check_path_can_use_stroage_estimate(const AccessPath *path, bool &can_use) +int ObAccessPathEstimation::do_estimate_rowcount(ObOptimizerContext &ctx, + common::ObIArray &paths, + const bool is_inner_path, + const ObIArray &filter_exprs, + ObBaseTableEstMethod &method) +{ + int ret = OB_SUCCESS; + bool is_success = true; + LOG_TRACE("Try to do estimate rowcount", K(method), K(is_inner_path)); + + if (OB_UNLIKELY(EST_INVALID == method) || + OB_UNLIKELY((method & EST_DS_FULL) && (method & EST_DS_BASIC)) || + OB_UNLIKELY((method & EST_DEFAULT) && (method & EST_STAT))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected est method", K(ret), K(method)); + } + + if (OB_SUCC(ret) && (method & (EST_DS_BASIC | EST_DS_FULL))) { + bool only_ds_basic_stat = (method & EST_DS_BASIC); + if (OB_FAIL(process_dynamic_sampling_estimation( + ctx, paths, is_inner_path, filter_exprs, only_ds_basic_stat, is_success))) { + LOG_WARN("failed to process statistics estimation", K(ret)); + } else if (!is_success) { + method &= ~EST_DS_BASIC; + method &= ~EST_DS_FULL; + method |= EST_DEFAULT; + } + } + + if (OB_SUCC(ret) && (method & EST_DEFAULT)) { + if (OB_FAIL(process_table_default_estimation(ctx, paths))) { + LOG_WARN("failed to process table default estimation", K(ret)); + } + } + + if (OB_SUCC(ret) && (method & EST_STAT)) { + if (OB_FAIL(process_statistics_estimation(paths))) { + LOG_WARN("failed to process statistics estimation", K(ret)); + } + } + + if (OB_SUCC(ret) && (method & EST_STORAGE)) { + if (OB_FAIL(process_storage_estimation(ctx, paths, is_success))) { + LOG_WARN("failed to process storage estimation", K(ret)); + } else if (!is_success) { + // The failure of storage estimation will not affect the result of statistics estimation + method &= ~EST_STORAGE; + } + } + return ret; +} + +int ObAccessPathEstimation::get_valid_est_methods(ObOptimizerContext &ctx, + common::ObIArray &paths, + const ObIArray &filter_exprs, + bool is_inner_path, + ObBaseTableEstMethod &valid_methods) +{ + int ret = OB_SUCCESS; + valid_methods = EST_DEFAULT | EST_STAT | EST_STORAGE | EST_DS_BASIC | EST_DS_FULL; + const ObBaseTableEstMethod EST_DS_METHODS = EST_DS_BASIC | EST_DS_FULL; + ObBaseTableEstMethod hint_specify_methods = 0; + const ObLogPlan* log_plan = NULL; + const OptTableMeta *table_meta = NULL; + if (OB_UNLIKELY(paths.empty()) || + OB_ISNULL(paths.at(0)->parent_) || + OB_ISNULL(log_plan = paths.at(0)->parent_->get_plan()) || + OB_ISNULL(log_plan->get_stmt()) || + FALSE_IT(table_meta = log_plan->get_basic_table_metas().get_table_meta_by_table_id(paths.at(0)->table_id_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(log_plan)); + } else if (ctx.use_default_stat()) { + valid_methods = EST_DEFAULT; + } else { + // some basic check + share::schema::ObTableType table_type = paths.at(0)->est_cost_info_.table_meta_info_->table_type_; + uint64_t ref_table_id = paths.at(0)->ref_table_id_; + if (is_inner_path) { + valid_methods &= ~EST_STORAGE; + valid_methods &= ~EST_DS_FULL; + } + if (!log_plan->get_stmt()->is_select_stmt()) { + valid_methods &= ~EST_DS_METHODS; + } + if (OB_ISNULL(table_meta) || + OB_UNLIKELY(OB_INVALID_ID == ref_table_id)) { + valid_methods &= ~EST_STAT; + valid_methods &= ~EST_DS_METHODS; + } else if (!table_meta->use_opt_stat()) { + valid_methods &= ~EST_STAT; + } else if (table_meta->is_stat_locked()) { + valid_methods &= ~EST_STORAGE; + } + if (table_type == EXTERNAL_TABLE) { + // TODO [EXTERNAL TABLE] + valid_methods &= ~EST_STORAGE; + valid_methods &= ~EST_DS_METHODS; + } + if (is_virtual_table(ref_table_id)) { + if (!ObDynamicSamplingUtils::is_ds_virtual_table(ref_table_id)) { + valid_methods &= ~EST_DS_METHODS; + } + if (!share::is_oracle_mapping_real_virtual_table(ref_table_id)) { + valid_methods &= ~EST_STORAGE; + } + } + } + + // check use storage estimation + bool use_storage_est = (valid_methods | EST_STORAGE); + for (int64_t i = 0; OB_SUCC(ret) && use_storage_est && i < paths.count(); i ++) { + AccessPath *path = paths.at(i); + const ObTablePartitionInfo *part_info = NULL; + if (OB_FAIL(check_path_can_use_storage_estimation(path, use_storage_est, ctx))) { + LOG_WARN("failed to check use storage est", K(ret), KPC(path)); + } + } + if (OB_SUCC(ret) && !use_storage_est) { + valid_methods &= ~EST_STORAGE; + } + + // check dynamic sampling + if (OB_SUCC(ret) && (valid_methods & EST_DS_METHODS) && + OB_FAIL(check_can_use_dynamic_sampling( + ctx, *log_plan, *table_meta, filter_exprs, valid_methods, hint_specify_methods))) { + LOG_WARN("failed to check dynamic sampling", K(ret)); + } + + // if there are any valid hint_specify_method, use it. + if (OB_SUCC(ret)) { + if (valid_methods & hint_specify_methods) { + valid_methods &= hint_specify_methods; + } + } + + return ret; +} + +int ObAccessPathEstimation::check_can_use_dynamic_sampling(ObOptimizerContext &ctx, + const ObLogPlan &log_plan, + const OptTableMeta &table_meta, + const ObIArray &filter_exprs, + ObBaseTableEstMethod &valid_methods, + ObBaseTableEstMethod &specify_methods) +{ + int ret = OB_SUCCESS; + int64_t ds_level = ObDynamicSamplingLevel::NO_DYNAMIC_SAMPLING; + int64_t sample_block_cnt = 0; + bool specify_ds = false; + bool has_invalid_ds_filters = false; + int64_t max_ds_timeout = 0; + const ObBaseTableEstMethod EST_DS_METHODS = EST_DS_BASIC | EST_DS_FULL; + if (OB_FAIL(ObDynamicSamplingUtils::get_valid_dynamic_sampling_level( + ctx.get_session_info(), + log_plan.get_log_plan_hint().get_dynamic_sampling_hint(table_meta.get_table_id()), + ctx.get_global_hint().get_dynamic_sampling(), + ds_level, + sample_block_cnt, + specify_ds))) { + LOG_WARN("failed to get valid dynamic sampling level", K(ret)); + } else if (OB_FAIL(ObDynamicSamplingUtils::get_dynamic_sampling_max_timeout(ctx, max_ds_timeout))) { + LOG_WARN("failed to get dynamic sampling max timeout", K(ret)); + } else if (ObDynamicSamplingLevel::NO_DYNAMIC_SAMPLING == ds_level || + max_ds_timeout <= 0) { + valid_methods &= ~EST_DS_METHODS; + } else if (ObDynamicSamplingUtils::check_is_failed_ds_table(table_meta.get_ref_table_id(), + table_meta.get_all_used_parts(), + ctx.get_failed_ds_tab_list())) { + valid_methods &= ~EST_DS_METHODS; + LOG_TRACE("get failed ds table, not use dynamic sampling", K(table_meta), K(ctx.get_failed_ds_tab_list())); + } else if (OB_FAIL(ObDynamicSamplingUtils::check_ds_can_use_filters(filter_exprs, has_invalid_ds_filters))) { + LOG_WARN("failed to check ds can use filters", K(ret)); + } else if (has_invalid_ds_filters) { + valid_methods &= ~EST_DS_FULL; + } else { + valid_methods &= ~EST_DS_BASIC; + } + if (OB_SUCC(ret) && specify_ds) { + specify_methods |= EST_DS_METHODS; + } + return ret; +} + +int ObAccessPathEstimation::choose_best_est_method(ObOptimizerContext &ctx, + common::ObIArray &paths, + const ObIArray &filter_exprs, + const ObBaseTableEstMethod &valid_methods, + ObBaseTableEstMethod& method) +{ + int ret = OB_SUCCESS; + /** + * There are seven est methods: + * 1. EST_DS_FULL: Dynamic sampling collects basic statistics and the final rowcount. + * 2. EST_DS_STORAGE: Dynamic sampling collects basic statistics and deduces the final rowcount with storage layer estimation. + * 3. EST_DS_BASIC: Dynamic sampling collects basic statistics to deduce the final rowcount. + * 4. EST_STORAGE_STAT: Use the storage layer and collected statistics to estimate rows. + * 5. EST_STORAGE_DEFAULT: Use the storage layer and default statistics to estimate rows. + * 6. EST_STAT: Use collected statistics to estimate rows. + * 7. EST_DEFAULT: Use default statistics to estimate rows. + * + * We prioritize them based on three different scenarios: + * 1. Normal scene: STORAGE > STAT > DYNAMIC SAMPLING > DEFAULT + * 2. Simple scene (DYNAMIC SAMPLING is not helpful for final rowcount): lower the priority of dynamic sampling because of its high cost. + * 3. Complex scene (with complex predicates): higher the priority of dynamic sampling because it is more accurate. + */ + + static const int64_t priority_cnt = 7; + static const ObBaseTableEstMethod EST_STORAGE_DEFAULT = EST_STORAGE | EST_DEFAULT; + static const ObBaseTableEstMethod EST_STORAGE_STAT = EST_STORAGE | EST_STAT; + static const ObBaseTableEstMethod EST_DS_STORAGE = EST_DS_BASIC | EST_STORAGE; + static const ObBaseTableEstMethod complex_est_priority[priority_cnt] = + {EST_DS_FULL, EST_STORAGE_STAT, EST_STAT, EST_DS_STORAGE, EST_DS_BASIC, EST_STORAGE_DEFAULT, EST_DEFAULT}; + static const ObBaseTableEstMethod simple_est_priority[priority_cnt] = + {EST_STORAGE_STAT, EST_STAT, EST_STORAGE_DEFAULT, EST_DEFAULT, EST_DS_FULL, EST_DS_STORAGE, EST_DS_BASIC}; + static const ObBaseTableEstMethod default_est_priority[priority_cnt] = + {EST_STORAGE_STAT, EST_STAT, EST_DS_FULL, EST_DS_STORAGE, EST_DS_BASIC, EST_STORAGE_DEFAULT, EST_DEFAULT}; + method = EST_INVALID; + bool is_simple_scene = false; + bool is_complex_scene = false; + bool can_use_ds = valid_methods & (EST_DS_FULL | EST_DS_BASIC); + bool can_use_storage = valid_methods & EST_STORAGE; + + // check is simple scene + bool is_table_get = false; + for (int64_t i = 0; OB_SUCC(ret) && !is_table_get && i < paths.count(); ++i) { + if (OB_ISNULL(paths.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(paths.at(i))); + } else if (paths.at(i)->pre_query_range_ != NULL && + OB_FAIL(paths.at(i)->pre_query_range_->is_get(is_table_get))) { + LOG_WARN("check query range is table get", K(ret)); + } + } + is_simple_scene = is_table_get; + if (OB_SUCC(ret) && !is_simple_scene && can_use_ds) { + ObLogPlan *log_plan = paths.at(0)->parent_->get_plan(); + ObSEArray ds_col_exprs; + if (!(valid_methods & EST_STORAGE)) { + // path which can not use storage estimation is not simple + } else if (OB_FAIL(get_need_dynamic_sampling_columns(paths.at(0)->parent_->get_plan(), + paths.at(0)->table_id_, + filter_exprs, true, true, + ds_col_exprs))) { + LOG_WARN("failed to get need dynamic sampling columns", K(ret)); + } else if (!ds_col_exprs.empty()) { + // path which contains no ds_col_exprs is not simple + } else { + is_simple_scene = true; + } + } + + // check is complex scene + if (OB_SUCC(ret) && !is_simple_scene && !is_complex_scene && (valid_methods | EST_DS_FULL)) { + ObArenaAllocator tmp_alloc("ObOptSel"); + ObSelEstimatorFactory factory(tmp_alloc); + const OptSelectivityCtx* sel_ctx = NULL; + if (OB_UNLIKELY(paths.empty()) || + OB_ISNULL(paths.at(0)) || + OB_ISNULL(sel_ctx = paths.at(0)->est_cost_info_.sel_ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted param", K(paths)); + } + for (int64_t i = 0; OB_SUCC(ret) && !is_complex_scene && i < filter_exprs.count(); ++i) { + const ObRawExpr *filter = filter_exprs.at(i); + ObSelEstimator *estimator = NULL; + if (OB_FAIL(factory.create_estimator(*sel_ctx, filter, estimator))) { + LOG_WARN("failed to create estimator", KPC(filter)); + } else if (estimator->tend_to_use_ds()) { + is_complex_scene = true; + // path which contains complex filters is complex + LOG_PRINT_EXPR(TRACE, "Try to use dynamic sampling because of complex filter:", filter); + } + } + } + + if (OB_FAIL(ret)) { + } else if (is_simple_scene) { + method = choose_one_est_method(valid_methods, simple_est_priority, priority_cnt); + } else if (is_complex_scene) { + method = choose_one_est_method(valid_methods, complex_est_priority, priority_cnt); + } else { + method = choose_one_est_method(valid_methods, default_est_priority, priority_cnt); + } + + LOG_TRACE("choose a best est method", K(valid_methods), K(is_simple_scene), K(is_complex_scene), K(method)); + return ret; +} + +int ObAccessPathEstimation::is_storage_estimation_enabled(const ObLogPlan* log_plan, + ObOptimizerContext &ctx, + uint64_t table_id, + uint64_t ref_table_id, + bool &can_use) +{ + int ret = OB_SUCCESS; + can_use = ctx.is_storage_estimation_enabled(); + const OptTableMeta *table_meta = NULL; + bool has_hint = false; + bool is_hint_enabled = false; + if (OB_ISNULL(log_plan)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(log_plan)); + } else if (is_virtual_table(ref_table_id) && + !share::is_oracle_mapping_real_virtual_table(ref_table_id)) { + //virtual table + can_use = false; + } else if (OB_ISNULL(table_meta = log_plan->get_basic_table_metas().get_table_meta_by_table_id(table_id)) || + OB_UNLIKELY(OB_INVALID_ID == table_meta->get_ref_table_id())) { + //not basic table + } else if (OB_FAIL(log_plan->get_stmt()->get_query_ctx()->get_global_hint().opt_params_.get_bool_opt_param( + ObOptParamHint::_ENABLE_STORAGE_CARDINALITY_ESTIMATION, is_hint_enabled, has_hint))) { + LOG_WARN("failed to check has opt param", K(ret)); + } else if (has_hint) { + can_use = is_hint_enabled; + } + return ret; +} + +int ObAccessPathEstimation::check_path_can_use_storage_estimation(const AccessPath *path, + bool &can_use, + ObOptimizerContext &ctx) { int ret = OB_SUCCESS; can_use = false; if (OB_ISNULL(path)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("access path is invalid", K(ret), K(path)); - } else if (is_virtual_table(path->ref_table_id_) && - !share::is_oracle_mapping_real_virtual_table(path->ref_table_id_)) { + } else if (OB_FAIL(is_storage_estimation_enabled(path->parent_->get_plan(), ctx ,path->table_id_, path->ref_table_id_, can_use))) { + LOG_WARN("fail to do check_path_can_use_storage_estimation ", K(ret), K(path)); + } else if (!can_use) { can_use = false; } else { const ObTablePartitionInfo *part_info = NULL; @@ -141,11 +407,11 @@ int ObAccessPathEstimation::check_path_can_use_stroage_estimate(const AccessPath } } } - LOG_TRACE("check_path_can_use_stroage_estimate", K(can_use)); + LOG_TRACE("check_path_can_use_storage_estimation", K(can_use)); return ret; } -int ObAccessPathEstimation::process_external_table_estimation(AccessPath *path) +int ObAccessPathEstimation::process_external_table_default_estimation(AccessPath *path) { //TODO [ExternalTable] need refine int ret = OB_SUCCESS; @@ -156,17 +422,11 @@ int ObAccessPathEstimation::process_external_table_estimation(AccessPath *path) } else { ObCostTableScanInfo &est_cost_info = path->est_cost_info_; est_cost_info.batch_type_ = ObSimpleBatch::T_SCAN; - if (est_cost_info.table_meta_info_->has_opt_stat_) { - if (OB_FAIL(process_statistics_estimation(path))) { - LOG_WARN("failed to process statistics estimation", K(ret)); - } - } else { - output_row_count = static_cast(OB_EST_DEFAULT_VIRTUAL_TABLE_ROW_COUNT); - path->est_cost_info_.logical_query_range_row_count_ = output_row_count; - path->est_cost_info_.phy_query_range_row_count_ = output_row_count; - path->est_cost_info_.index_back_row_count_ = 0; - path->est_cost_info_.output_row_count_ = output_row_count; - } + output_row_count = static_cast(OB_EST_DEFAULT_VIRTUAL_TABLE_ROW_COUNT); + path->est_cost_info_.logical_query_range_row_count_ = output_row_count; + path->est_cost_info_.phy_query_range_row_count_ = output_row_count; + path->est_cost_info_.index_back_row_count_ = 0; + path->est_cost_info_.output_row_count_ = output_row_count; } return ret; } @@ -198,7 +458,42 @@ int ObAccessPathEstimation::process_vtable_default_estimation(AccessPath *path) return ret; } -int ObAccessPathEstimation::process_table_default_estimation(AccessPath *path) +int ObAccessPathEstimation::process_table_default_estimation(ObOptimizerContext &ctx, ObIArray &paths) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < paths.count(); ++i) { + AccessPath *path = paths.at(i); + if (OB_ISNULL(path)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(paths)); + } else if (ctx.use_default_stat()) { + if (OB_FAIL(process_table_force_default_estimation(path))) { + LOG_WARN("failed to process process vtable default estimation", K(ret)); + } else if (i == 0 && OB_FAIL(update_table_stat_info_by_default(path))) { + LOG_WARN("failed to update table stat by default", K(ret)); + } + } else if (is_virtual_table(path->ref_table_id_) && + !share::is_oracle_mapping_real_virtual_table(path->ref_table_id_)) { + if (OB_FAIL(process_vtable_default_estimation(path))) { + LOG_WARN("failed to process vtable default estimation", K(ret)); + } else if (i == 0 && OB_FAIL(update_table_stat_info_by_default(path))) { + LOG_WARN("failed to update table stat by default", K(ret)); + } + } else if (EXTERNAL_TABLE == path->est_cost_info_.table_meta_info_->table_type_) { + if (OB_FAIL(process_external_table_default_estimation(path))) { + LOG_WARN("failed to process external table default estimation", K(ret)); + } else if (i == 0 && OB_FAIL(update_table_stat_info_by_default(path))) { + LOG_WARN("failed to update table stat by default", K(ret)); + } + } else if (OB_FAIL(process_statistics_estimation(path))) { + // use default opt table meta inited in ObJoinOrder::init_est_sel_info_for_access_path + LOG_WARN("failed to process statistics estimation", K(ret)); + } + } + return ret; +} + +int ObAccessPathEstimation::process_table_force_default_estimation(AccessPath *path) { int ret = OB_SUCCESS; double output_row_count = ObOptStatManager::get_default_table_row_count(); @@ -211,7 +506,6 @@ int ObAccessPathEstimation::process_table_default_estimation(AccessPath *path) LOG_WARN("failed to reset skip scan info", K(ret)); } else { ObCostTableScanInfo &est_cost_info = path->est_cost_info_; - est_cost_info.row_est_method_ = DEFAULT_STAT; path->est_cost_info_.logical_query_range_row_count_ = output_row_count; path->est_cost_info_.phy_query_range_row_count_ = output_row_count; path->est_cost_info_.index_back_row_count_ = 0; @@ -236,7 +530,8 @@ int ObAccessPathEstimation::process_table_default_estimation(AccessPath *path) } int ObAccessPathEstimation::process_storage_estimation(ObOptimizerContext &ctx, - ObIArray &paths) + ObIArray &paths, + bool &is_success) { int ret = OB_SUCCESS; ObArenaAllocator arena("CardEstimation"); @@ -359,7 +654,6 @@ int ObAccessPathEstimation::process_storage_estimation(ObOptimizerContext &ctx, if (!need_fallback) { for (int64_t i = 0; OB_SUCC(ret) && i < tasks.count(); ++i) { const ObBatchEstTasks *task = tasks.at(i); - RowCountEstMethod est_method = RowCountEstMethod::STORAGE_STAT; for (int64_t j = 0; OB_SUCC(ret) && j < task->paths_.count(); ++j) { const obrpc::ObEstPartResElement &res = task->res_.index_param_res_.at(j); AccessPath *path = task->paths_.at(j); @@ -368,8 +662,7 @@ int ObAccessPathEstimation::process_storage_estimation(ObOptimizerContext &ctx, } else if (OB_FAIL(estimate_prefix_range_rowcount(res, path->est_cost_info_))) { LOG_WARN("failed to estimate prefix range rowcount", K(ret)); - } else if (OB_FAIL(fill_cost_table_scan_info(path->est_cost_info_, - est_method))) { + } else if (OB_FAIL(fill_cost_table_scan_info(path->est_cost_info_))) { LOG_WARN("failed to fill cost table scan info", K(ret)); } } @@ -383,6 +676,7 @@ int ObAccessPathEstimation::process_storage_estimation(ObOptimizerContext &ctx, tasks.at(i) = NULL; } } + is_success = !need_fallback; return ret; } @@ -479,15 +773,13 @@ int ObAccessPathEstimation::estimate_prefix_range_rowcount( return ret; } -int ObAccessPathEstimation::fill_cost_table_scan_info(ObCostTableScanInfo &est_cost_info, - const RowCountEstMethod est_method) +int ObAccessPathEstimation::fill_cost_table_scan_info(ObCostTableScanInfo &est_cost_info) { int ret = OB_SUCCESS; double &output_row_count = est_cost_info.output_row_count_; double &logical_row_count = est_cost_info.logical_query_range_row_count_; double &physical_row_count = est_cost_info.phy_query_range_row_count_; double &index_back_row_count = est_cost_info.index_back_row_count_; - est_cost_info.row_est_method_ = est_method; // we have exact query ranges on a unique index, // each range is expected to have at most one row @@ -673,11 +965,18 @@ int ObAccessPathEstimation::process_statistics_estimation(AccessPath *path) K(est_cost_info.pushdown_prefix_filter_sel_), K(est_cost_info.ss_postfix_range_filters_sel_)); - RowCountEstMethod est_method = table_meta_info->has_opt_stat_ ? RowCountEstMethod::BASIC_STAT - : RowCountEstMethod::DEFAULT_STAT; + OZ (fill_cost_table_scan_info(est_cost_info)); + } + return ret; +} - OZ (fill_cost_table_scan_info(est_cost_info, - est_method)); +int ObAccessPathEstimation::process_statistics_estimation(ObIArray &paths) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < paths.count(); ++i) { + if (OB_FAIL(process_statistics_estimation(paths.at(i)))) { + LOG_WARN("failed to process table default estimation", K(ret)); + } } return ret; } @@ -1347,13 +1646,14 @@ int ObAccessPathEstimation::process_dynamic_sampling_estimation(ObOptimizerConte ObIArray &paths, const bool is_inner_path, const ObIArray &filter_exprs, - common::ObIArray &no_ds_paths) + bool only_ds_basic_stat, + bool &is_success) { int ret = OB_SUCCESS; LOG_TRACE("begin process dynamic sampling estimation", K(paths), K(is_inner_path)); ObDSTableParam ds_table_param; ObSEArray ds_result_items; - bool only_ds_basic_stat = false; + is_success = true; const ObLogPlan* log_plan = NULL; const OptTableMeta *table_meta = NULL; common::ObSEArray ds_paths; @@ -1362,25 +1662,20 @@ int ObAccessPathEstimation::process_dynamic_sampling_estimation(ObOptimizerConte if (paths.empty()) { //do nothing } else if (OB_ISNULL(paths.at(0)->parent_) || - OB_ISNULL(log_plan = paths.at(0)->parent_->get_plan())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(log_plan)); - } else if (OB_ISNULL(table_meta = log_plan->get_basic_table_metas().get_table_meta_by_table_id(paths.at(0)->table_id_)) || + OB_ISNULL(log_plan = paths.at(0)->parent_->get_plan()) || + OB_ISNULL(table_meta = log_plan->get_basic_table_metas().get_table_meta_by_table_id(paths.at(0)->table_id_)) || OB_UNLIKELY(OB_INVALID_ID == table_meta->get_ref_table_id())) { - //not basic table doesn't dynamic sampling, do nothing + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(log_plan), KPC(table_meta)); } else if (OB_FAIL(ObDynamicSamplingUtils::get_ds_table_param(ctx, log_plan, table_meta, - false, ds_table_param, specify_ds))) { + ds_table_param, specify_ds))) { LOG_WARN("failed to get ds table param", K(ret), K(ds_table_param)); } else if (!ds_table_param.is_valid()) { - if (OB_FAIL(no_ds_paths.assign(paths))) { - LOG_WARN("failed to assign", K(ret)); - } + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid ds table param", K(ret), K(ds_table_param)); } else if (OB_FAIL(add_ds_result_items(paths, filter_exprs, specify_ds, - ds_result_items, only_ds_basic_stat, - ds_paths, no_ds_paths))) { + ds_result_items, only_ds_basic_stat))) { LOG_WARN("failed to init ds result items", K(ret)); - } else if (!only_ds_basic_stat && ds_paths.empty()) { - //do nothing } else { OPT_TRACE("begin to process table dynamic sampling estimation"); ObArenaAllocator allocator("ObOpTableDS", OB_MALLOC_NORMAL_BLOCK_SIZE, ctx.get_session_info()->get_effective_tenant_id()); @@ -1396,8 +1691,8 @@ int ObAccessPathEstimation::process_dynamic_sampling_estimation(ObOptimizerConte table_meta->get_all_used_parts(), ctx.get_failed_ds_tab_list()))) { LOG_WARN("failed to add failed ds table list", K(ret)); - } else if (OB_FAIL(no_ds_paths.assign(paths))) { - LOG_WARN("failed to assign", K(ret)); + } else { + is_success = false; } } else { LOG_WARN("failed to dynamic sampling", K(ret), K(start_time), K(ds_table_param)); @@ -1408,12 +1703,13 @@ int ObAccessPathEstimation::process_dynamic_sampling_estimation(ObOptimizerConte no_ds_data))) { LOG_WARN("failed to update table stat info by dynamic sampling", K(ret)); } else if (only_ds_basic_stat || no_ds_data) { - if (OB_FAIL(no_ds_paths.assign(paths))) { - LOG_WARN("failed to assign", K(ret)); - } else {/*do nothing*/} + if (OB_FAIL(process_statistics_estimation(paths))) { + LOG_WARN("failed to process statistics estimation", K(ret)); + } } else if (OB_FAIL(estimate_path_rowcount_by_dynamic_sampling(ds_table_param.table_id_, paths, is_inner_path, ds_result_items))) { LOG_WARN("failed to estimate path rowcount by dynamic sampling", K(ret)); + LOG_TRACE("finish dynamic sampling", K(only_ds_basic_stat), K(no_ds_data), K(is_success)); } OPT_TRACE("end to process table dynamic sampling estimation"); OPT_TRACE_TITLE("DYNAMIC SAMPLE RESULT"); @@ -1427,56 +1723,30 @@ int ObAccessPathEstimation::add_ds_result_items(ObIArray &paths, const ObIArray &filter_exprs, const bool specify_ds, ObIArray &ds_result_items, - bool &only_ds_basic_stat, - common::ObIArray &ds_paths, - common::ObIArray &no_ds_paths) + bool only_ds_basic_stat) { int ret = OB_SUCCESS; - only_ds_basic_stat = false; bool all_path_is_get = false; - if (OB_UNLIKELY(paths.empty())) { + if (OB_UNLIKELY(paths.empty()) || + OB_ISNULL(paths.at(0)) || + OB_ISNULL(paths.at(0)->parent_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(paths)); - } else if (OB_FAIL(get_valid_ds_path(paths, specify_ds, ds_paths, no_ds_paths, all_path_is_get))) { - LOG_WARN("failed to get valid ds path", K(ret)); - } else if (ds_paths.empty()) {//check need dynamic basic stats for join + } else if (only_ds_basic_stat) {// some filters invalid, only dynamic basic stats ObDSResultItem basic_item(ObDSResultItemType::OB_DS_BASIC_STAT, paths.at(0)->ref_table_id_); - if (all_path_is_get) {//if all path is table get, no dynamic sampling - //do nothing - } else if (OB_UNLIKELY(no_ds_paths.empty())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(paths), K(ds_paths)); - } else if (OB_FAIL(get_need_dynamic_sampling_columns(no_ds_paths.at(0)->parent_->get_plan(), - no_ds_paths.at(0)->table_id_, - filter_exprs, true, true, - basic_item.exprs_))) { - LOG_WARN("failed to get need dynamic sampling columns", K(ret)); - } else if (basic_item.exprs_.empty()) { - //do nothing - } else if (OB_FAIL(ds_result_items.push_back(basic_item))) { - LOG_WARN("failed to push back", K(ret)); - } else { - only_ds_basic_stat = true; - } - } else if (OB_FAIL(ObDynamicSamplingUtils::check_ds_can_use_filters(filter_exprs, only_ds_basic_stat))) { - LOG_WARN("failed to check ds can use filters", K(ret)); - } else if (only_ds_basic_stat) {//filters invalid, only can dynamic basic stats - ObDSResultItem basic_item(ObDSResultItemType::OB_DS_BASIC_STAT, ds_paths.at(0)->ref_table_id_); - if (OB_FAIL(get_need_dynamic_sampling_columns(ds_paths.at(0)->parent_->get_plan(), - ds_paths.at(0)->table_id_, + if (OB_FAIL(get_need_dynamic_sampling_columns(paths.at(0)->parent_->get_plan(), + paths.at(0)->table_id_, filter_exprs, false, false, basic_item.exprs_))) { LOG_WARN("failed to get need dynamic sampling columns", K(ret)); } else if (OB_FAIL(ds_result_items.push_back(basic_item))) { LOG_WARN("failed to push back", K(ret)); - } else if (OB_FAIL(append(no_ds_paths, ds_paths))) { - LOG_WARN("failed to append", K(ret)); } else {/*do nothing*/} } else { //1.init ds basic stat item - ObDSResultItem basic_item(ObDSResultItemType::OB_DS_BASIC_STAT, ds_paths.at(0)->ref_table_id_); - if (OB_FAIL(get_need_dynamic_sampling_columns(ds_paths.at(0)->parent_->get_plan(), - ds_paths.at(0)->table_id_, + ObDSResultItem basic_item(ObDSResultItemType::OB_DS_BASIC_STAT, paths.at(0)->ref_table_id_); + if (OB_FAIL(get_need_dynamic_sampling_columns(paths.at(0)->parent_->get_plan(), + paths.at(0)->table_id_, filter_exprs, true, false, basic_item.exprs_))) { LOG_WARN("failed to get need dynamic sampling columns", K(ret)); @@ -1491,15 +1761,15 @@ int ObAccessPathEstimation::add_ds_result_items(ObIArray &paths, LOG_WARN("failed to push back", K(ret)); } else { //3.init query range item - for (int64_t i = 0; OB_SUCC(ret) && i < ds_paths.count(); ++i) { - if (OB_ISNULL(ds_paths.at(i))) { + for (int64_t i = 0; OB_SUCC(ret) && i < paths.count(); ++i) { + if (OB_ISNULL(paths.at(i))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(ds_paths.at(i))); - } else if (ds_paths.at(i)->est_cost_info_.prefix_filters_.empty()) { + LOG_WARN("get unexpected null", K(ret), K(paths.at(i))); + } else if (paths.at(i)->est_cost_info_.prefix_filters_.empty()) { //do nothing } else { - ObDSResultItem tmp_item(ObDSResultItemType::OB_DS_FILTER_OUTPUT_STAT, ds_paths.at(i)->index_id_); - if (OB_FAIL(tmp_item.exprs_.assign(ds_paths.at(i)->est_cost_info_.prefix_filters_))) { + ObDSResultItem tmp_item(ObDSResultItemType::OB_DS_FILTER_OUTPUT_STAT, paths.at(i)->index_id_); + if (OB_FAIL(tmp_item.exprs_.assign(paths.at(i)->est_cost_info_.prefix_filters_))) { LOG_WARN("failed to assign", K(ret)); } else if (OB_FAIL(ds_result_items.push_back(tmp_item))) { LOG_WARN("failed to push back", K(ret)); @@ -1510,54 +1780,7 @@ int ObAccessPathEstimation::add_ds_result_items(ObIArray &paths, } } LOG_TRACE("succeed to add_ds result items", K(paths), K(all_path_is_get), K(filter_exprs), - K(ds_result_items), K(only_ds_basic_stat), K(ds_paths), - K(no_ds_paths)); - return ret; -} - -int ObAccessPathEstimation::get_valid_ds_path(ObIArray &paths, - const bool specify_ds, - common::ObIArray &ds_paths, - common::ObIArray &no_ds_paths, - bool &all_path_is_get) -{ - int ret = OB_SUCCESS; - all_path_is_get = true; - if (specify_ds) { - if (OB_FAIL(ds_paths.assign(paths))) { - LOG_WARN("failed to assign", K(ret)); - } else {/*do nothing*/} - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < paths.count(); ++i) { - if (OB_ISNULL(paths.at(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(paths.at(i))); - } else { - bool can_use = false; - bool is_get = false; - if (paths.at(i)->pre_query_range_ != NULL && OB_FAIL(paths.at(i)->pre_query_range_->is_get(is_get))) { - LOG_WARN("check query range is table get", K(ret)); - } else if (is_get) {//table get no need dynamic sampling - if (OB_FAIL(no_ds_paths.push_back(paths.at(i)))) { - LOG_WARN("faile to push back", K(ret)); - } - } else if (OB_FAIL(check_path_can_use_stroage_estimate(paths.at(i), can_use))) { - LOG_WARN("failed to check path can use stroage estimate", K(ret)); - } else if (can_use && - paths.at(i)->est_cost_info_.pushdown_prefix_filters_.empty()) { - if (OB_FAIL(no_ds_paths.push_back(paths.at(i)))) {//use the stroage estimate directly. - LOG_WARN("faile to push back", K(ret)); - } else { - all_path_is_get = false; - } - } else if (OB_FAIL(ds_paths.push_back(paths.at(i)))) { - LOG_WARN("faile to push back", K(ret)); - } else { - all_path_is_get = false; - } - } - } - } + K(ds_result_items), K(only_ds_basic_stat)); return ret; } @@ -1666,6 +1889,25 @@ int ObAccessPathEstimation::update_table_stat_info_by_dynamic_sampling(AccessPat return ret; } +int ObAccessPathEstimation::update_table_stat_info_by_default(AccessPath *path) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(path)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), KPC(path)); + } else if (path->get_output_row_count() > 0) { + OptTableMetas &table_metas = path->parent_->get_plan()->get_basic_table_metas(); + OptTableMeta *table_meta = table_metas.get_table_meta_by_table_id(path->table_id_); + if (OB_NOT_NULL(table_meta)) { + table_meta->set_rows(path->get_output_row_count()); + for (int64_t i = 0; i < table_meta->get_column_metas().count(); ++i) { + table_meta->get_column_metas().at(i).set_default_meta(path->get_output_row_count()); + } + } + } + return ret; +} + int ObAccessPathEstimation::estimate_path_rowcount_by_dynamic_sampling(const uint64_t table_id, ObIArray &paths, const bool is_inner_path, @@ -1732,7 +1974,6 @@ int ObAccessPathEstimation::estimate_path_rowcount_by_dynamic_sampling(const uin } } if (OB_SUCC(ret)) { - est_cost_info.row_est_method_ = RowCountEstMethod::DYNAMIC_SAMPLING_STAT; // block sampling double block_sample_ratio = est_cost_info.sample_info_.is_block_sample() ? 0.01 * est_cost_info.sample_info_.percent_ : 1.0; diff --git a/src/sql/optimizer/ob_access_path_estimation.h b/src/sql/optimizer/ob_access_path_estimation.h index 3da4e5b3b4..69a05a23c2 100644 --- a/src/sql/optimizer/ob_access_path_estimation.h +++ b/src/sql/optimizer/ob_access_path_estimation.h @@ -42,14 +42,54 @@ public: common::ObIArray &paths, const bool is_inner_path, const ObIArray &filter_exprs, - bool &is_use_ds); + ObBaseTableEstMethod &method); static int estimate_full_table_rowcount(ObOptimizerContext &ctx, const ObTablePartitionInfo &table_part_info, ObTableMetaInfo &meta); static bool is_retry_ret(int ret); + static int is_storage_estimation_enabled(const ObLogPlan* log_plan, + ObOptimizerContext &ctx, + uint64_t table_id, + uint64_t ref_table_id, + bool &can_use); private: + static inline uint64_t choose_one_est_method(ObBaseTableEstMethod valid_methods, const ObBaseTableEstMethod est_priority[], uint64_t cnt) + { + ObBaseTableEstMethod ret = EST_INVALID; + for (int64_t i = 0; EST_INVALID == ret && i < cnt; i ++) { + if ((valid_methods & est_priority[i]) == est_priority[i]) { + ret = est_priority[i]; + } + } + return ret; + } + + static int get_valid_est_methods(ObOptimizerContext &ctx, + common::ObIArray &paths, + const ObIArray &filter_exprs, + bool is_inner_path, + ObBaseTableEstMethod &valid_methods); + + static int check_can_use_dynamic_sampling(ObOptimizerContext &ctx, + const ObLogPlan &log_plan, + const OptTableMeta &table_meta, + const ObIArray &filter_exprs, + ObBaseTableEstMethod &valid_methods, + ObBaseTableEstMethod &specify_methods); + + static int choose_best_est_method(ObOptimizerContext &ctx, + common::ObIArray &paths, + const ObIArray &filter_exprs, + const ObBaseTableEstMethod &valid_methods, + ObBaseTableEstMethod& method); + + static int do_estimate_rowcount(ObOptimizerContext &ctx, + common::ObIArray &paths, + const bool is_inner_path, + const ObIArray &filter_exprs, + ObBaseTableEstMethod &method); static int process_common_estimate_rowcount(ObOptimizerContext &ctx, common::ObIArray &paths); @@ -58,35 +98,37 @@ private: static int64_t get_scan_range_count(const ObIArray &ranges); - static int choose_best_estimation_method(const AccessPath *path, - const ObTableMetaInfo &meta, - bool &use_storage_stat, - bool &use_default_vt); - - static int check_path_can_use_stroage_estimate(const AccessPath *path, bool &can_use); + static int check_path_can_use_storage_estimation(const AccessPath *path, + bool &can_use, + ObOptimizerContext &ctx); static int choose_leader_replica(const ObCandiTabletLoc &part_loc_info, const bool can_use_remote, const ObAddr &local_addr, EstimatedPartition &best_partition); - static int process_external_table_estimation(AccessPath *path); + static int process_external_table_default_estimation(AccessPath *path); static int process_vtable_default_estimation(AccessPath *path); - static int process_table_default_estimation(AccessPath *path); + static int process_table_force_default_estimation(AccessPath *path); + static int process_table_default_estimation(ObOptimizerContext &ctx, ObIArray &path); /// following functions are mainly uesd by statistics estimation static int process_statistics_estimation(AccessPath *path); + static int process_statistics_estimation(ObIArray &paths); + /// following functions are mainly used by storage estimation static int process_storage_estimation(ObOptimizerContext &ctx, - ObIArray &paths); + ObIArray &paths, + bool &is_success); static int process_dynamic_sampling_estimation(ObOptimizerContext &ctx, ObIArray &paths, const bool is_inner_path, const ObIArray &filter_exprs, - common::ObIArray &no_ds_paths); + bool only_ds_basic_stat, + bool &is_success); static int calc_skip_scan_prefix_ndv(AccessPath &ap, double &prefix_ndv); @@ -134,8 +176,7 @@ private: const obrpc::ObEstPartResElement &result, ObCostTableScanInfo &est_cost_info); - static int fill_cost_table_scan_info(ObCostTableScanInfo &est_cost_info, - const RowCountEstMethod est_method); + static int fill_cost_table_scan_info(ObCostTableScanInfo &est_cost_info); static int get_key_ranges(ObOptimizerContext &ctx, ObIAllocator &allocator, @@ -184,25 +225,18 @@ private: const ObIArray &filter_exprs, const bool specify_ds, ObIArray &ds_result_items, - bool &only_ds_basic_stat, - common::ObIArray &ds_paths, - common::ObIArray &no_ds_paths); + bool only_ds_basic_stat); static int update_table_stat_info_by_dynamic_sampling(AccessPath *path, int64_t ds_level, ObIArray &ds_result_items, bool &no_ds_data); + static int update_table_stat_info_by_default(AccessPath *path); static int estimate_path_rowcount_by_dynamic_sampling(const uint64_t table_id, ObIArray &paths, const bool is_inner_path, ObIArray &ds_result_items); - - static int get_valid_ds_path(ObIArray &paths, - const bool specify_ds, - common::ObIArray &ds_paths, - common::ObIArray &no_ds_paths, - bool &all_path_is_get); }; } diff --git a/src/sql/optimizer/ob_dynamic_sampling.cpp b/src/sql/optimizer/ob_dynamic_sampling.cpp index 2aa53b703f..ba83c7dca7 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.cpp +++ b/src/sql/optimizer/ob_dynamic_sampling.cpp @@ -1232,7 +1232,6 @@ bool ObDynamicSampling::all_ds_col_stats_are_gathered(const ObDSTableParam ¶ int ObDynamicSamplingUtils::get_valid_dynamic_sampling_level(const ObSQLSessionInfo *session_info, const ObTableDynamicSamplingHint *table_ds_hint, const int64_t global_ds_level, - bool has_opt_stat, int64_t &ds_level, int64_t &sample_block_cnt, bool &specify_ds) @@ -1258,7 +1257,7 @@ int ObDynamicSamplingUtils::get_valid_dynamic_sampling_level(const ObSQLSessionI LOG_WARN("get unexpected null", K(ret), K(session_info)); } else if (session_info->is_user_session() && OB_FAIL(session_info->get_opt_dynamic_sampling(session_ds_level))) { LOG_WARN("failed to get opt dynamic sampling level", K(ret)); - } else if (session_ds_level == ObDynamicSamplingLevel::BASIC_DYNAMIC_SAMPLING && !has_opt_stat) { + } else if (session_ds_level == ObDynamicSamplingLevel::BASIC_DYNAMIC_SAMPLING) { ds_level = session_ds_level; } LOG_TRACE("get valid dynamic sampling level", KPC(table_ds_hint), K(global_ds_level), K(specify_ds), @@ -1269,7 +1268,6 @@ int ObDynamicSamplingUtils::get_valid_dynamic_sampling_level(const ObSQLSessionI int ObDynamicSamplingUtils::get_ds_table_param(ObOptimizerContext &ctx, const ObLogPlan *log_plan, const OptTableMeta *table_meta, - bool ignore_opt_stat, ObDSTableParam &ds_table_param, bool &specify_ds) { @@ -1282,16 +1280,15 @@ int ObDynamicSamplingUtils::get_ds_table_param(ObOptimizerContext &ctx, OB_ISNULL(table_item = log_plan->get_stmt()->get_table_item_by_id(table_meta->get_table_id()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(log_plan), KPC(table_meta), KPC(table_item)); - } else if (!log_plan->get_stmt()->is_select_stmt() || ctx.use_default_stat()) { - //do nothing - } else if (is_virtual_table(table_meta->get_ref_table_id()) && !is_ds_virtual_table(table_meta->get_ref_table_id())) { - //do nothing - } else if (table_meta->get_table_type() == EXTERNAL_TABLE) { - //do nothing TODO [EXTERNAL TABLE] + } else if (OB_UNLIKELY(!log_plan->get_stmt()->is_select_stmt()) || + OB_UNLIKELY(ctx.use_default_stat()) || + OB_UNLIKELY(is_virtual_table(table_meta->get_ref_table_id()) && !is_ds_virtual_table(table_meta->get_ref_table_id())) || + OB_UNLIKELY(table_meta->get_table_type() == EXTERNAL_TABLE)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected param", K(ret), K(log_plan), KPC(table_meta), KPC(table_item)); } else if (OB_FAIL(get_valid_dynamic_sampling_level(ctx.get_session_info(), log_plan->get_log_plan_hint().get_dynamic_sampling_hint(table_meta->get_table_id()), ctx.get_global_hint().get_dynamic_sampling(), - ignore_opt_stat ? false : table_meta->use_opt_stat(), ds_level, sample_block_cnt, specify_ds))) { @@ -1324,6 +1321,9 @@ int ObDynamicSamplingUtils::get_ds_table_param(ObOptimizerContext &ctx, ds_table_param.table_name_ = table_item->table_name_; ds_table_param.alias_name_ = table_item->alias_name_; } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid ds level", K(ret), K(ds_level)); } return ret; } diff --git a/src/sql/optimizer/ob_dynamic_sampling.h b/src/sql/optimizer/ob_dynamic_sampling.h index 0546d49d4a..3fc191fa14 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.h +++ b/src/sql/optimizer/ob_dynamic_sampling.h @@ -349,7 +349,6 @@ public: static int get_valid_dynamic_sampling_level(const ObSQLSessionInfo *session_info, const ObTableDynamicSamplingHint *table_ds_hint, const int64_t global_ds_level, - bool has_opt_stat, int64_t &ds_level, int64_t &sample_block_cnt, bool &specify_ds); @@ -357,7 +356,6 @@ public: static int get_ds_table_param(ObOptimizerContext &ctx, const ObLogPlan *log_plan, const OptTableMeta *table_meta, - bool ignore_opt_stat, ObDSTableParam &ds_table_param, bool &specify_ds); @@ -383,6 +381,10 @@ public: const uint64_t ref_table_id, int64_t °ree); + static bool check_is_failed_ds_table(const uint64_t table_id, + const common::ObIArray &used_part_id, + const common::ObIArray &failed_list); + private: static int check_ds_can_use_filter(const ObRawExpr *filter, bool &no_use, @@ -394,9 +396,6 @@ private: bool &need_specify_partition, ObIArray &partition_infos); - static bool check_is_failed_ds_table(const uint64_t table_id, - const common::ObIArray &used_part_id, - const common::ObIArray &failed_list); } ; diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index b56ea5b13d..a6282e3f32 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -2814,16 +2814,19 @@ int ObJoinOrder::will_use_skip_scan(const uint64_t table_id, */ int ObJoinOrder::estimate_rowcount_for_access_path(ObIArray &all_paths, const bool is_inner_path, - common::ObIArray &filter_exprs) + common::ObIArray &filter_exprs, + ObBaseTableEstMethod &method) { int ret = OB_SUCCESS; bool is_use_ds = false; + method = EST_INVALID; + get_plan()->get_selectivity_ctx().set_dependency_type(FilterDependencyType::INDEPENDENT); if (OB_FAIL(ObAccessPathEstimation::estimate_rowcount(OPT_CTX, all_paths, is_inner_path, filter_exprs, - is_use_ds))) { + method))) { LOG_WARN("failed to do access path estimation", K(ret)); - } else if (!is_inner_path && !is_use_ds && OB_FAIL(compute_table_rowcount_info())) { + } else if (!is_inner_path && !(method & EST_DS_FULL) && OB_FAIL(compute_table_rowcount_info())) { LOG_WARN("failed to compute table rowcount info", K(ret)); } return ret; @@ -2995,42 +2998,38 @@ int ObJoinOrder::revise_output_rows_after_creating_path(PathHelper &helper, // get the minimal output row count int64_t maximum_count = -1; int64_t range_prefix_count = -1; - RowCountEstMethod estimate_method = INVALID_METHOD; - for (int64_t i = 0; OB_SUCC(ret) && i < access_paths.count(); ++i) { - AccessPath *path = access_paths.at(i); - if (OB_ISNULL(path)) { + if (helper.est_method_ & EST_STORAGE) { + bool contain_false_range_path = false; + for (int64_t i = 0; OB_SUCC(ret) && !contain_false_range_path && i < access_paths.count(); ++i) { + AccessPath *path = access_paths.at(i); + if (OB_ISNULL(path)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null path", K(ret)); + } else if (OB_UNLIKELY((range_prefix_count = path->range_prefix_count_) < 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected range prefix count", K(ret), K(range_prefix_count)); + } else if (path->is_false_range()) { + contain_false_range_path = true; + output_rows_ = 0.0; + LOG_TRACE("OPT:revise output rows for false range", K(output_rows_)); + } else if (maximum_count <= range_prefix_count) { + LOG_TRACE("OPT:revise output rows", K(path->get_output_row_count()), + K(output_rows_), K(maximum_count), K(range_prefix_count), K(ret)); + if (maximum_count == range_prefix_count) { + output_rows_ = std::min(path->get_output_row_count(), output_rows_); + } else { + output_rows_ = path->get_output_row_count(); + maximum_count = range_prefix_count; + } + } else { /*do nothing*/ } + } + } else { + if (OB_UNLIKELY(access_paths.empty()) || + OB_ISNULL(access_paths.at(0))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("null path", K(ret)); - } else if (path->est_cost_info_.row_est_method_ == BASIC_STAT && - (estimate_method == STORAGE_STAT)) { - // do nothing if the path is estimated by ndv - } else if (OB_UNLIKELY((range_prefix_count = path->range_prefix_count_) < 0)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected range prefix count", K(ret), K(range_prefix_count)); - } else if (maximum_count <= range_prefix_count) { - LOG_TRACE("OPT:revise output rows", K(path->get_output_row_count()), - K(output_rows_), K(maximum_count), K(range_prefix_count), K(ret)); - if (maximum_count == range_prefix_count) { - output_rows_ = std::min(path->get_output_row_count(), output_rows_); - } else { - output_rows_ = path->get_output_row_count(); - maximum_count = range_prefix_count; - } - estimate_method = path->est_cost_info_.row_est_method_; - } else { /*do nothing*/ } - } - - // update index rows in normal path - for (int64_t i = 0; OB_SUCC(ret) && i < interesting_paths_.count(); ++i) { - if (OB_ISNULL(interesting_paths_.at(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("null path", K(ret)); - } else if (!interesting_paths_.at(i)->is_access_path()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("should be access path", K(ret)); + LOG_WARN("unexpected null", K(access_paths)); } else { - path = static_cast (interesting_paths_.at(i)); - path->est_cost_info_.row_est_method_ = estimate_method; + output_rows_ = access_paths.at(0)->get_output_row_count(); } } @@ -4011,7 +4010,8 @@ int ObJoinOrder::estimate_size_for_base_table(PathHelper &helper, LOG_WARN("failed to fill path index meta info", K(ret)); } else if (OB_FAIL(estimate_rowcount_for_access_path(access_paths, helper.is_inner_path_, - helper.filters_))) { + helper.filters_, + helper.est_method_))) { LOG_WARN("failed to estimate and add access path", K(ret)); } else { LOG_TRACE("estimate rows for base table", K(output_rows_), @@ -7686,9 +7686,6 @@ int ObJoinOrder::create_one_cte_table_path(const TableItem* table_item, ret = OB_ALLOCATE_MEMORY_FAILED; LOG_ERROR("failed to allocate an AccessPath", K(ret)); } else { - // magic number ? @guoping.wgp refine this - output_rows_ = 199; - output_row_size_ = 199; ap = new(ap) CteTablePath(); ap->table_id_ = table_id_; ap->ref_table_id_ = table_item->ref_id_; @@ -7715,6 +7712,63 @@ int ObJoinOrder::create_one_cte_table_path(const TableItem* table_item, return ret; } +int ObJoinOrder::estimate_size_and_width_for_fake_cte(uint64_t table_id, ObSelectLogPlan *nonrecursive_plan) +{ + int ret = OB_SUCCESS; + const ObDMLStmt *stmt = NULL; + const TableItem *table_item = NULL; + ObLogicalOperator *nonrecursive_root = NULL; + double selectivity = 0; + if (OB_ISNULL(get_plan()) || OB_ISNULL(stmt = get_plan()->get_stmt()) || + OB_ISNULL(nonrecursive_plan) || OB_ISNULL(nonrecursive_plan->get_stmt()) || + OB_UNLIKELY(!nonrecursive_plan->get_stmt()->is_select_stmt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(get_plan()), K(nonrecursive_plan), + K(nonrecursive_root), K(stmt), K(ret)); + } else if (OB_FAIL(nonrecursive_plan->get_candidate_plans().get_best_plan(nonrecursive_root))) { + LOG_WARN("failed to get best plan", K(ret)); + } else if (OB_ISNULL(nonrecursive_root)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(nonrecursive_root)); + } else if (FALSE_IT(nonrecursive_plan->get_selectivity_ctx().init_op_ctx( + &nonrecursive_root->get_output_equal_sets(), nonrecursive_root->get_card()))) { + // do nothing + } else if (OB_FAIL(get_plan()->get_basic_table_metas().add_generate_table_meta_info( + get_plan()->get_stmt(), + static_cast(nonrecursive_plan->get_stmt()), + table_id, + nonrecursive_plan->get_update_table_metas(), + nonrecursive_plan->get_selectivity_ctx(), + nonrecursive_root->get_card()))) { + LOG_WARN("failed to add generate table meta info", K(ret)); + } else if (OB_FAIL(ObOptEstCost::estimate_width_for_table(get_plan()->get_basic_table_metas(), + get_plan()->get_selectivity_ctx(), + stmt->get_column_items(), + table_id, + output_row_size_))) { + LOG_WARN("estimate width of row failed", K(table_id), K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(get_plan()->get_basic_table_metas(), + get_plan()->get_selectivity_ctx(), + get_restrict_infos(), + selectivity, + get_plan()->get_predicate_selectivities()))) { + LOG_WARN("failed to calc filter selectivities", K(get_restrict_infos()), K(ret)); + } else { + set_output_rows(nonrecursive_root->get_card() * selectivity); + if (OB_FAIL(ObOptSelectivity::update_table_meta_info(get_plan()->get_basic_table_metas(), + get_plan()->get_update_table_metas(), + get_plan()->get_selectivity_ctx(), + table_id, + get_output_rows(), + get_restrict_infos(), + get_plan()->get_predicate_selectivities()))) { + LOG_WARN("failed to update table meta info", K(ret)); + } + LOG_TRACE("estimate rows for fake cte", K(output_rows_), K(get_plan()->get_basic_table_metas())); + } + return ret; +} + int ObJoinOrder::generate_cte_table_paths() { int ret = OB_SUCCESS; @@ -7726,6 +7780,8 @@ int ObJoinOrder::generate_cte_table_paths() } else if (OB_ISNULL(table_item = stmt->get_table_item_by_id(table_id_))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(table_id_)); + } else if (OB_FAIL(estimate_size_and_width_for_fake_cte(table_id_, get_plan()->get_nonrecursive_plan_for_fake_cte()))) { + LOG_WARN("failed to calc filter selectivities", K(get_restrict_infos()), K(ret)); } else if (OB_FAIL(create_one_cte_table_path(table_item, get_plan()->get_optimizer_context().get_match_all_sharding()))) { LOG_WARN("failed to create one cte table path", K(ret)); @@ -12342,6 +12398,7 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, bool has_opt_stat = false; OptTableStatType stat_type = OptTableStatType::DEFAULT_TABLE_STAT; int64_t last_analyzed = 0; + bool is_stat_locked = false; const int64_t origin_part_cnt = all_used_part_id.count(); bool use_global = false; ObSEArray global_part_ids; @@ -12394,6 +12451,7 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, LOG_WARN("failed to get table stats", K(ret)); } else { last_analyzed = stat.get_last_analyzed(); + is_stat_locked = stat.get_stat_locked(); table_meta_info_.table_row_count_ = stat.get_row_count(); table_meta_info_.part_size_ = !use_global ? static_cast(stat.get_avg_data_size()) : static_cast(stat.get_avg_data_size() * all_used_part_id.count()) @@ -12452,7 +12510,8 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, stat_type, global_part_ids, scale_ratio, - last_analyzed))) { + last_analyzed, + is_stat_locked))) { LOG_WARN("failed to add base table meta info", K(ret)); } } diff --git a/src/sql/optimizer/ob_join_order.h b/src/sql/optimizer/ob_join_order.h index e7c4c32c20..829aec91ef 100644 --- a/src/sql/optimizer/ob_join_order.h +++ b/src/sql/optimizer/ob_join_order.h @@ -679,6 +679,10 @@ struct EstimateCostInfo { } // compute current path is inner path and contribute query ranges int compute_valid_inner_path(); + inline bool is_false_range() + { + return 1 == est_cost_info_.ranges_.count() && est_cost_info_.ranges_.at(0).is_false_range(); + } TO_STRING_KV(K_(table_id), K_(ref_table_id), @@ -1258,7 +1262,8 @@ struct NullAwareAntiJoinInfo { filters_(), subquery_exprs_(), inner_paths_(), - table_opt_info_(NULL) + table_opt_info_(NULL), + est_method_(EST_INVALID) {} bool is_inner_path_; @@ -1277,6 +1282,7 @@ struct NullAwareAntiJoinInfo { ObSEArray const_param_constraints_; ObSEArray expr_constraints_; + ObBaseTableEstMethod est_method_; }; struct DeducedExprInfo { @@ -1773,6 +1779,7 @@ struct NullAwareAntiJoinInfo { const ObIArray &filters, double &output_card); + int estimate_size_and_width_for_fake_cte(uint64_t table_id, ObSelectLogPlan *nonrecursive_plan); int create_one_cte_table_path(const TableItem* table_item, ObShardingInfo * sharding); int generate_cte_table_paths(); @@ -2363,7 +2370,8 @@ struct NullAwareAntiJoinInfo { int estimate_rowcount_for_access_path(ObIArray &all_paths, const bool is_inner_path, - common::ObIArray &filter_exprs); + common::ObIArray &filter_exprs, + ObBaseTableEstMethod &method); inline bool can_use_remote_estimate(OptimizationMethod method) { diff --git a/src/sql/optimizer/ob_join_order.h.review b/src/sql/optimizer/ob_join_order.h.review deleted file mode 100644 index c436cb3927..0000000000 --- a/src/sql/optimizer/ob_join_order.h.review +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright (c) 2014 Alibaba Inc. All Rights Reserved. -// Author: - -#ifndef _OB_JOIN_ORDER_H -#define _OB_JOIN_ORDER_H 1 -#include "share/schema/ob_schema_manager.h" -#include "sql/resolver/expr/ob_raw_expr.h" -#include "ob_log_join.h" -#include "sql/resolver/dml/ob_select_stmt.h" -//#include "ob_logical_plan_tree.h" -#include "ob_optimizer_util.h" - -using oceanbase::common::ObString; - -namespace oceanbase -{ -namespace sql -{ - class ObSelectLogPlan; -// class ObJoinOrder -// { -// public: -// ObAccessPath(); -// ~ObAccessPath(); -// inline const uint64_t get_table_id() {return table_id_;} -// // add access path by name -// int add_path(uint64_t data_table_id, ObString &hinted_index_name); -// // add access path -// int add_path(ObTableSchema &index_table_schema, -// uint64_t index_tid); -// bool is_emtpy(); -// -// private: -// // genereate all the ObRawExpr for index key columns -// int ObAccessPath::generate_index_key_raw_expr( -// ObTableSchema &index_table_schema, -// common::ObArray &index_keys); -// // add access path (with cost) -// int ObAccessPath::add_access_path(ObTableSchema &index_table_schema, -// uint64_t index_tid, -// int64_t cost); -// // add access path (with cost and index keys) -// int ObAccessPath::add_access_path(ObTableSchema &table_schema, -// uint64_t index_tid, -// common::ObArray &index_keys, -// int64_t cost); -// private: -// -// uint64_t table_id_; -// typedef std::pair ObCostedPath; -// // Note: -// // "Unordered " path does not mean the path are "not ordered" - It only means that -// // the user shall not assume any ordering while choosing the path. -// ObCostedPath unordered_path_; -// typedef std::pair, ObCostedPath> interesting_order; -// typedef common::ObArray interesting_orders; -// interesting_orders interesting_orders_; -// }; - - - //added by ryan.ly 20141230 - //start.. - const double cpu_operator_cost = 1; - -// struct RestrictInfo -// { -// ObBitSet<> table_ids_; -// ObRawExpr* qual_; -// -// RestrictInfo(); -// }; - typedef ObRawExpr RestrictInfo; //RestrictInfo结构暂时没什么用,需要时再打开 - - enum ObJoinType //建议其他地方引用此ObJoinType。。。 - { - UNKNOWN_JOIN = 1, - INNER_JOIN, - LEFT_OUTER_JOIN, - RIGHT_OUTER_JOIN, - FULL_OUTER_JOIN, - LEFT_SEMI_JOIN, - RIGHT_SEMI_JOIN, - LEFT_ANTI_SEMI_JOIN, - RIGHT_ANTI_SEMI_JOIN - }; - - struct JoinInfo - { - ObBitSet<> table_set_; //要连接的表集合(即包含在join_qual_中的,除自己之外的所有表) - ObArray join_qual_; - ObArray join_condition_;//是join_qual_的子集 - ObJoinType join_type_; - - JoinInfo(ObJoinType join_type) : join_type_(join_type){} - virtual ~JoinInfo() {}; - }; - - enum PathType - { - ACCESS, - JOIN, - SUBQUERY - }; - - class Path - { - public: - Path(){} - Path(PathType path_type, ObJoinOrder* parent, double cost) - : path_type_(path_type), parent_(parent), cost_(cost), num_tables_(0) {} - virtual ~Path(); - PathType path_type_; - ObJoinOrder* parent_; - ObArray ordering_;//Path的ordering不一定来自于Stmt上的expr,所以这里应该释放 - double cost_; - uint64_t num_tables_; - virtual double cost() { return 0;} - }; - - //..end - - class AccessPath : public Path //modified by liyang 20141230 - { - public: - AccessPath() : Path(ACCESS, NULL, 0) , index_schema_(NULL), base_table_id_(0) {} - AccessPath(const share::schema::ObTableSchema *schema, ObJoinOrder* parent, double cost) - : Path(ACCESS, parent, cost), index_schema_(schema), base_table_id_(0) {} - virtual ~AccessPath(){} - share::schema::ObTableSchema *get_table_schema(); -// int64_t get_card() const { return card_; } -// int64_t get_cost() const { return cost_; } - uint64_t get_base_table_id() const { return base_table_id_; } - void set_base_table_id(uint64_t table_id) { base_table_id_ = table_id; } - uint64_t get_index_table_id() const { return index_schema_->get_table_id(); } -// private: - const share::schema::ObTableSchema *index_schema_; - uint64_t base_table_id_; -// int64_t card_; -// double cost_; - //added by ryan.ly 20141230 - //start.. - virtual double cost() { return 0;} - //..end - }; - - class JoinOrder : public Path //modified by liyang 20141230 - { - public: - JoinOrder() - {} - virtual ~JoinOrder() - {} - // join order - common::ObArray joined_tables_; - // join methods - common::ObArray join_algos_; - // ordering privided by the join_order -// common::ObArray ordering_; - // cost -// int64_t cost_; - // cardinality -// int64_t card_; - - //added by ryan.ly 20141230 - //start.. - ObArray join_type_; - ObArray< ObRawExpr*> left_need_ordering_; - ObArray< ObRawExpr*> right_need_ordering_; - ObArray< ObRawExpr*> join_condition_; - ObArray< ObRawExpr*> join_filter_; - double cost(); - double cost_nestloop(); - double cost_mergejoin(); - double cost_sort(double n, ObArray ordering); - //..end - }; - -// struct InterestingOrder -// { -// JoinOrder join_order_; -// common::ObArray *ordering_; -// -// InterestingOrder() : ordering_(&join_order_.ordering_) {}; -// }; -// typedef common::ObArray Interesting_Orders; - - class ObJoinOrder - { - public: - //ObJoinOrder(){} - ObJoinOrder(ObIAllocator *allocator, ObLogPlan *plan) - : allocator_(allocator), plan_(plan), plain_join_order_(INT64_MAX, OB_INVALID_ID) {} - ObJoinOrder(ObIAllocator *allocator, ObLogPlan *plan, PathType type, double rows) - : allocator_(allocator), plan_(plan), type_(type), rows_(rows){} - virtual ~ObJoinOrder(); - void reset(); -// inline common::ObArray& get_table_ids() {return sorted_table_ids_;} - - // add access path by name - int add_path(uint64_t data_table_id, ObString &hinted_index_name); - // add access path - int add_path(share::schema::ObTableSchema &index_table_schema, uint64_t index_tid); - /** - * @brief Check if there is any table added - * @retval true if there is no table at all - * @retval false otherwise - */ -// inline bool is_empty() const { return (0 == sorted_table_ids_.count()); } - - /** - * @brief Add a table to the joined table chain - * @param [in] data_table_id - table id of the base table - * @param [in] hinted_index_name - index hint - * @retval OB_SUCCESS execute success - * @retval OB_SOME_ERROR special errno need to handle - * - * This function add a table(all access paths that may be worth keeping) to - * the join order. - * 1. If an index hint is given, it will be honored with our best effort. - * Situations where hint cannot be honored include the specified index - * not existing, index schema being not accessible, and etc. - * 2. If the hint cannot be used or there is no user hint, we will try to - * collect all availabe indexes and keep the ones that're worth keeping. - * ie. with minimum costs or providing an 'interesting' ordering. - * - */ - int add_table(const uint64_t data_table_id, const ObString &hinted_index_name); - /** - * @brief Get the join orders - * @param [in] data_table_id - table id of the base table - * @param [in] hinted_index_name - index hint - * @retval OB_SUCCESS execute success - * @retval OB_SOME_ERROR special errno need to handle - */ - inline ObArray &get_join_orders() {return interesting_paths_;} - - //added by ryan.ly 20141231 - //start.. //TODO - ObJoinOrder& operator=(const ObJoinOrder &other); - int add_path(Path* path); - int estimate_rel_size(ObJoinOrder* lefttree, ObJoinOrder* righttree); - double calculate_selectivity(const ObIArray& quals) const; - double clause_selectivity(const RestrictInfo* qual) const; - double convert_obj_to_scalar(const ObObj* obj) const; - double convert_string_to_scalar(char* str) const; - int extract_index_keys_filters(ObIArray& ordering, - ObIArray& keys, - ObIArray& filters, - ObIArray& quals); - inline double get_rows() const {return rows_;} - inline ObArray& get_join_infos() {return join_info_set_;} - inline ObBitSet<>& get_tables() {return table_set_;} - inline ObBitSet<>& get_unjoined_tables() {return unjoined_tables_;} - inline ObArray& get_restrict_infos() {return restrict_info_set_;} - inline ObArray& get_interesting_paths() {return interesting_paths_;} - inline void set_type(PathType type){type_ = type;} - inline PathType get_type(){return type_;} - - //..end - - private: - - // The function is temporarily placed here to help us pass the compilation. We need to have - // this support from the storage layer. -// int64_t get_index_access_cost(uint64_t index_tid, int64_t cost, int64_t card); - int get_index_access_cost(uint64_t index_tid, - ObArray conditions, - ObArray filters, - ObArray quals, - double& cost); - - /** - * @brief Genereate all the ObRawExpr for index key columns - * @param [in] index_table_schema - schema of the index table - * @param [out] index_keys - a sequence of the index keys - * @retval OB_SUCCESS execute success - * @retval OB_SOME_ERROR special errno need to handle - */ - int generate_index_key_raw_expr(const share::schema::ObTableSchema &index_table_schema, - common::ObArray &index_keys); - /** - * @brief Add an interesting order - * @param [in] io - interesting order to be added - * @retval OB_SUCCESS execute success - * @retval OB_SOME_ERROR special errno need to handle - */ - int add_interesting_order(JoinOrder io); - - /** - * @brief Make a single table access path using the hint - * @param [in] table_id - table id of the base table - * @param [in] hinted_index_name - index hint - * @param [in/out] ap - access path to generate - * @retval OB_SUCCESS execute success - * @retval OB_SOME_ERROR special errno need to handle - */ - int generate_access_path_by_hint(const uint64_t data_table_id, - const ObString &hinted_index_name, - AccessPath* ap); - /** - * @brief Add a table to the join order using the index hint - * @param [in] table_id - table id of the base table - * @param [in] hinted_index_name - index hint - * @param [in/out] succeeded - if the hint is successfully used - * @retval OB_SUCCESS execute success - * @retval OB_SOME_ERROR special errno need to handle - */ - int add_table_by_hint(const uint64_t table_id, - const ObString &hinted_index_name, - bool &succeeded); - - // @brief check if all primary key are equal condition - int all_primary_key_equal(const uint64_t table_id, - bool &all_primary_key_covered); - - // @brief Check if we can use pure heuristics for optimizing single table query - int single_table_heuristics(bool &can_use_heuristics); - /** - * @brief Add an interesting order(costly) - * @param [in] io - interesting order to be added - * @retval OB_SUCCESS execute success - * @retval OB_SOME_ERROR special errno need to handle - */ - int add_interesting_order_costly(Path* io); - /** - * @brief Add an interesting order forcely - * @param [in] io - interesting order to be added - * @retval OB_SUCCESS execute success - * @retval OB_SOME_ERROR special errno need to handle - */ - int add_interesting_order_forcely(Path* io); - - - -// // add access path (with cost) -// int ObJoinOrder::add_access_path(ObTableSchema &index_table_schema, -// uint64_t index_tid, -// int64_t cost); -// // add access path (with cost and index keys) -// int ObJoinOrder::add_access_path(ObTableSchema &table_schema, -// uint64_t index_tid, -// common::ObArray &index_keys, -// int64_t cost); - private: - // all table ids(sorted) -// common::ObArray sorted_table_ids_; - -// Interesting_Orders interesting_orders_; - // Note: - // "Uninteresting" does not mean the join-order are "not ordered" - It only means that - // the user shall not assume any ordering while choosing the path. - // FIRST - cost - // SECOND - index of the join order in join_orders_ - - ObIAllocator *allocator_; - ObLogPlan *plan_; - std::pair plain_join_order_; - //added by ryan.ly 20141231 - //start.. - PathType type_; - ObArray interesting_paths_; //因为JoinOrder继承于Path了,可以把interesting_orders_去掉了 - ObBitSet<> table_set_; //存在这里的是TableItem所在的下标 - ObArray join_info_set_; - ObArray restrict_info_set_; - ObBitSet<> unjoined_tables_; - double rows_; - //..end - }; -} - -} - -#endif diff --git a/src/sql/optimizer/ob_log_count.cpp b/src/sql/optimizer/ob_log_count.cpp index 0f048f2cc6..a37d8c2296 100644 --- a/src/sql/optimizer/ob_log_count.cpp +++ b/src/sql/optimizer/ob_log_count.cpp @@ -82,6 +82,8 @@ int ObLogCount::do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op OB_ISNULL(child = get_child(ObLogicalOperator::first_child))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(get_plan()), K(child), K(ret)); + } else if (OB_FALSE_IT(get_plan()->get_selectivity_ctx().init_op_ctx( + &child->get_output_equal_sets(), child->get_card()))) { } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(get_plan()->get_basic_table_metas(), get_plan()->get_selectivity_ctx(), get_filter_exprs(), diff --git a/src/sql/optimizer/ob_log_distinct.cpp b/src/sql/optimizer/ob_log_distinct.cpp index d34aaf8f9c..936260b3ef 100644 --- a/src/sql/optimizer/ob_log_distinct.cpp +++ b/src/sql/optimizer/ob_log_distinct.cpp @@ -161,6 +161,8 @@ int ObLogDistinct::do_re_est_cost(EstimateCostInfo ¶m, double &card, double double child_cost = child->get_cost(); double child_ndv = total_ndv_; const int64_t parallel = param.need_parallel_; + double origin_child_card = child_card; + bool need_scale_ndv = false; if (param.need_row_count_ >= 0 && child_card > 0 && total_ndv_ > 0 && @@ -170,9 +172,13 @@ int ObLogDistinct::do_re_est_cost(EstimateCostInfo ¶m, double &card, double param.need_row_count_ = child_card * (1 - std::pow((1 - child_ndv / total_ndv_), total_ndv_ / child_card)); } else { param.need_row_count_ = -1; + need_scale_ndv = true; } if (OB_FAIL(SMART_CALL(child->re_est_cost(param, child_card, child_cost)))) { LOG_WARN("failed to re est child cost", K(ret)); + } else if (need_scale_ndv && + FALSE_IT(child_ndv = std::min(child_ndv, ObOptSelectivity::scale_distinct(child_card, origin_child_card, child_ndv)))) { + // do nothing } else if (OB_FAIL(inner_est_cost(parallel, child_card, child_ndv, op_cost))) { LOG_WARN("failed to est distinct cost", K(ret)); } else { diff --git a/src/sql/optimizer/ob_log_group_by.cpp b/src/sql/optimizer/ob_log_group_by.cpp index 0a6f0c5cf7..35dd355839 100644 --- a/src/sql/optimizer/ob_log_group_by.cpp +++ b/src/sql/optimizer/ob_log_group_by.cpp @@ -37,9 +37,11 @@ using namespace oceanbase::common; int ObThreeStageAggrInfo::assign(const ObThreeStageAggrInfo &info) { int ret = OB_SUCCESS; + aggr_stage_ = info.aggr_stage_; distinct_aggr_count_ = info.distinct_aggr_count_; aggr_code_idx_ = info.aggr_code_idx_; aggr_code_expr_ = info.aggr_code_expr_; + aggr_code_ndv_ = info.aggr_code_ndv_; if (OB_FAIL(distinct_exprs_.assign(info.distinct_exprs_))) { LOG_WARN("failed to assign distinct exprs", K(ret)); } else if (OB_FAIL(distinct_aggr_batch_.assign(info.distinct_aggr_batch_))) { @@ -263,14 +265,17 @@ int ObLogGroupBy::do_re_est_cost(EstimateCostInfo ¶m, double &card, double & double selectivity = 1.0; ObLogicalOperator *child = get_child(ObLogicalOperator::first_child); const int64_t parallel = param.need_parallel_; - if (OB_ISNULL(child)) { + double number_of_copies = get_number_of_copies(); + if (OB_ISNULL(child) || OB_UNLIKELY(number_of_copies < 1)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(child)); + LOG_WARN("get unexpected null", K(ret), K(child), K(number_of_copies)); } else if (OB_FAIL(get_child_est_info(parallel, child_card, child_ndv, selectivity))) { LOG_WARN("failed to get chidl est info", K(ret)); } else { double child_cost = child->get_cost(); double need_ndv = child_ndv; + double origin_child_card = child_card; + bool need_scale_ndv = false; if (param.need_row_count_ >= 0 && child->get_card() > 0 && child_ndv > 0 && @@ -281,17 +286,27 @@ int ObLogGroupBy::do_re_est_cost(EstimateCostInfo ¶m, double &card, double & } if (child_card > 0) { param.need_row_count_ = child_card * (1 - std::pow((1 - need_ndv / child_ndv), child_ndv / child_card)); + param.need_row_count_ /= number_of_copies; } else { param.need_row_count_ = 0; } } else { param.need_row_count_ = -1; + need_scale_ndv = true; } if (is_block_op()) { param.need_row_count_ = -1; //reset need row count } if (OB_FAIL(SMART_CALL(child->re_est_cost(param, child_card, child_cost)))) { LOG_WARN("failed to re est child cost", K(ret)); + } else { + // At the first stage, child output will be replicated + child_card = child_card * number_of_copies; + if (need_scale_ndv) { + need_ndv = std::min(child_ndv, ObOptSelectivity::scale_distinct(child_card, origin_child_card, child_ndv)); + } + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(inner_est_cost(parallel, child_card, need_ndv, @@ -301,6 +316,9 @@ int ObLogGroupBy::do_re_est_cost(EstimateCostInfo ¶m, double &card, double & } else { cost = child_cost + op_cost; card = need_ndv * selectivity; + if (param.override_) { + set_total_ndv(need_ndv); + } } } return ret; @@ -324,9 +342,6 @@ int ObLogGroupBy::inner_est_cost(const int64_t parallel, double child_card, doub LOG_WARN("failed to get group rollup exprs", K(ret)); } else { per_dop_card = child_card / parallel; - if (is_first_stage()) { - per_dop_card = per_dop_card * three_stage_info_.distinct_aggr_count_; - } if ((get_group_by_exprs().empty() && get_rollup_exprs().empty()) || SCALAR_AGGREGATE == algo_) { per_dop_ndv = 1.0; } else if (parallel > 1) { @@ -360,7 +375,11 @@ int ObLogGroupBy::inner_est_cost(const int64_t parallel, double child_card, doub get_aggr_funcs().count(), opt_ctx); } - child_ndv = per_dop_ndv * parallel; + + child_ndv = std::min(child_card, per_dop_ndv * parallel); + if (SCALAR_AGGREGATE == algo_) { + child_ndv = std::max(1.0, child_ndv); + } } return ret; } @@ -385,6 +404,8 @@ int ObLogGroupBy::get_child_est_info(const int64_t parallel, double &child_card, } //having filter selectivity if (OB_SUCC(ret)) { + // At the first stage, child output will be replicated + child_card = child_card * get_number_of_copies(); get_plan()->get_selectivity_ctx().init_row_count(get_origin_child_card(), child_ndv); if (OB_FAIL(ObOptSelectivity::calculate_selectivity(get_plan()->get_update_table_metas(), get_plan()->get_selectivity_ctx(), @@ -762,65 +783,77 @@ int ObLogGroupBy::set_rollup_info( return ret; } -int ObLogGroupBy::set_first_stage_info(ObRawExpr *aggr_code_expr, - ObIArray &batch) +int ObThreeStageAggrInfo::set_first_stage_info(ObRawExpr *aggr_code_expr, + ObIArray &batch, + double aggr_code_ndv) { int ret = OB_SUCCESS; + reuse(); aggr_stage_ = ObThreeStageAggrStage::FIRST_STAGE; - three_stage_info_.aggr_code_expr_ = aggr_code_expr; - three_stage_info_.distinct_aggr_count_ = 0; - if (!ObOptimizerUtil::find_item(group_exprs_, - aggr_code_expr, - &three_stage_info_.aggr_code_idx_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("aggr code expr is not found", K(ret)); - } else if (OB_FAIL(three_stage_info_.distinct_aggr_batch_.assign(batch))) { + aggr_code_expr_ = aggr_code_expr; + distinct_aggr_count_ = 0; + aggr_code_ndv_ = aggr_code_ndv; + if (OB_FAIL(distinct_aggr_batch_.assign(batch))) { LOG_WARN("failed to assign batch", K(ret)); } else { for (int64_t i = 0; i < batch.count(); ++i) { - three_stage_info_.distinct_aggr_count_ += batch.at(i).mocked_aggrs_.count(); + distinct_aggr_count_ += batch.at(i).mocked_aggrs_.count(); } } return ret; } -int ObLogGroupBy::set_second_stage_info(ObRawExpr *aggr_code_expr, - ObIArray &batch, - ObIArray &distinct_exprs) +int ObThreeStageAggrInfo::set_second_stage_info(ObRawExpr *aggr_code_expr, + ObIArray &batch, + ObIArray &distinct_exprs) { int ret = OB_SUCCESS; + reuse(); aggr_stage_ = ObThreeStageAggrStage::SECOND_STAGE; - three_stage_info_.aggr_code_expr_ = aggr_code_expr; - three_stage_info_.distinct_aggr_count_ = 0; - if (!ObOptimizerUtil::find_item(group_exprs_, - aggr_code_expr, - &three_stage_info_.aggr_code_idx_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("aggr code expr is not found", K(ret)); - } else if (OB_FAIL(three_stage_info_.distinct_aggr_batch_.assign(batch))) { + aggr_code_expr_ = aggr_code_expr; + distinct_aggr_count_ = 0; + if (OB_FAIL(distinct_aggr_batch_.assign(batch))) { LOG_WARN("failed to assign batch", K(ret)); - } else if (OB_FAIL(three_stage_info_.distinct_exprs_.assign(distinct_exprs))) { + } else if (OB_FAIL(distinct_exprs_.assign(distinct_exprs))) { LOG_WARN("failed to assign distinct", K(ret)); } else { for (int64_t i = 0; i < batch.count(); ++i) { - three_stage_info_.distinct_aggr_count_ += batch.at(i).mocked_aggrs_.count(); + distinct_aggr_count_ += batch.at(i).mocked_aggrs_.count(); } } return ret; } -int ObLogGroupBy::set_third_stage_info(ObRawExpr *aggr_code_expr, - ObIArray &batch) +int ObThreeStageAggrInfo::set_third_stage_info(ObRawExpr *aggr_code_expr, + ObIArray &batch) { int ret = OB_SUCCESS; + reuse(); aggr_stage_ = ObThreeStageAggrStage::THIRD_STAGE; - three_stage_info_.aggr_code_expr_ = aggr_code_expr; - three_stage_info_.distinct_aggr_count_ = 0; - if (OB_FAIL(three_stage_info_.distinct_aggr_batch_.assign(batch))) { + aggr_code_expr_ = aggr_code_expr; + distinct_aggr_count_ = 0; + if (OB_FAIL(distinct_aggr_batch_.assign(batch))) { LOG_WARN("failed to assign batch", K(ret)); + } else { + for (int64_t i = 0; i < batch.count(); ++i) { + distinct_aggr_count_ += batch.at(i).mocked_aggrs_.count(); + } } - for (int64_t i = 0; i < batch.count(); ++i) { - three_stage_info_.distinct_aggr_count_ += batch.at(i).mocked_aggrs_.count(); + return ret; +} + +int ObLogGroupBy::set_three_stage_info(const ObThreeStageAggrInfo &info) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(three_stage_info_.assign(info))) { + LOG_WARN("failed to assgin", K(ret)); + } else if (is_third_stage()) { + // do nothing + } else if (!ObOptimizerUtil::find_item(group_exprs_, + three_stage_info_.aggr_code_expr_, + &three_stage_info_.aggr_code_idx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("aggr code expr is not found", K(ret)); } return ret; } diff --git a/src/sql/optimizer/ob_log_group_by.h b/src/sql/optimizer/ob_log_group_by.h index 80cd47fd36..95c24870bf 100644 --- a/src/sql/optimizer/ob_log_group_by.h +++ b/src/sql/optimizer/ob_log_group_by.h @@ -24,20 +24,38 @@ namespace sql class ObLogSort; struct ObThreeStageAggrInfo { - ObThreeStageAggrInfo() : distinct_aggr_count_(-1), + ObThreeStageAggrInfo() : + aggr_stage_(ObThreeStageAggrStage::NONE_STAGE), + distinct_aggr_count_(-1), aggr_code_idx_(-1), aggr_code_expr_(NULL), distinct_aggr_batch_(), - distinct_exprs_() + distinct_exprs_(), + aggr_code_ndv_(1.0) {} + ObThreeStageAggrStage aggr_stage_; int64_t distinct_aggr_count_; int64_t aggr_code_idx_; ObRawExpr *aggr_code_expr_; ObArray distinct_aggr_batch_; common::ObArray distinct_exprs_; + double aggr_code_ndv_; int assign(const ObThreeStageAggrInfo &info); + + void reuse() { + aggr_stage_ = ObThreeStageAggrStage::NONE_STAGE; + aggr_code_idx_ = -1; + aggr_code_expr_ = NULL; + distinct_aggr_batch_.reuse(); + distinct_exprs_.reuse(); + aggr_code_ndv_ = 1.0; + } + + int set_first_stage_info(ObRawExpr *aggr_code_expr, ObIArray &batch, double aggr_code_ndv); + int set_second_stage_info(ObRawExpr *aggr_code_expr, ObIArray &batch, ObIArray &distinct_exprs); + int set_third_stage_info(ObRawExpr *aggr_code_expr, ObIArray &batch); }; struct ObRollupAdaptiveInfo @@ -75,7 +93,6 @@ public: is_partition_gi_(false), total_ndv_(-1.0), origin_child_card_(-1.0), - aggr_stage_(ObThreeStageAggrStage::NONE_STAGE), three_stage_info_(), rollup_adaptive_info_(), force_push_down_(false), @@ -91,15 +108,7 @@ public: virtual int get_explain_name_internal(char *buf, const int64_t buf_len, int64_t &pos); - int set_first_stage_info(ObRawExpr *aggr_code_expr, - ObIArray &batch); - - int set_second_stage_info(ObRawExpr *aggr_code_expr, - ObIArray &batch, - ObIArray &distinct_exprs); - - int set_third_stage_info(ObRawExpr *aggr_code_expr, - ObIArray &batch); + int set_three_stage_info(const ObThreeStageAggrInfo &info); int set_rollup_info(const ObRollupStatus rollup_status, ObRawExpr *rollup_id_expr); @@ -175,16 +184,16 @@ public: int allocate_startup_expr_post()override; - inline ObThreeStageAggrStage get_aggr_stage() const { return aggr_stage_; } + inline ObThreeStageAggrStage get_aggr_stage() const { return three_stage_info_.aggr_stage_; } inline int64_t get_aggr_code_idx() const { return three_stage_info_.aggr_code_idx_; } inline ObRawExpr* get_aggr_code_expr() { return three_stage_info_.aggr_code_expr_; } inline int64_t get_distinct_aggr_count() const { return three_stage_info_.distinct_aggr_count_; } inline common::ObIArray &get_distinct_exprs() { return three_stage_info_.distinct_exprs_; } - inline bool is_three_stage_aggr() const { return ObThreeStageAggrStage::NONE_STAGE != aggr_stage_; } - inline bool is_first_stage() const { return ObThreeStageAggrStage::FIRST_STAGE == aggr_stage_; } - inline bool is_second_stage() const { return ObThreeStageAggrStage::SECOND_STAGE == aggr_stage_; } - inline bool is_third_stage() const { return ObThreeStageAggrStage::THIRD_STAGE == aggr_stage_; } + inline bool is_three_stage_aggr() const { return ObThreeStageAggrStage::NONE_STAGE != three_stage_info_.aggr_stage_; } + inline bool is_first_stage() const { return ObThreeStageAggrStage::FIRST_STAGE == three_stage_info_.aggr_stage_; } + inline bool is_second_stage() const { return ObThreeStageAggrStage::SECOND_STAGE == three_stage_info_.aggr_stage_; } + inline bool is_third_stage() const { return ObThreeStageAggrStage::THIRD_STAGE == three_stage_info_.aggr_stage_; } inline bool force_push_down() const { return force_push_down_; } inline bool is_adaptive_aggregate() const { return HASH_AGGREGATE == get_algo() && !force_push_down() @@ -216,6 +225,8 @@ public: virtual int compute_sharding_info() override; + // used for the rowcount estimation of the first stage + double get_number_of_copies() { return is_first_stage() ? three_stage_info_.aggr_code_ndv_ : 1.0; }; void set_pushdown_scalar_aggr() { is_pushdown_scalar_aggr_ = true; } bool is_pushdown_scalar_aggr() { return is_pushdown_scalar_aggr_; } @@ -244,7 +255,6 @@ private: double total_ndv_; double origin_child_card_; - ObThreeStageAggrStage aggr_stage_; ObThreeStageAggrInfo three_stage_info_; // for rollup distributor and collector ObRollupAdaptiveInfo rollup_adaptive_info_; diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index 605f57911e..ee57f9bdc0 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -137,7 +137,8 @@ ObLogPlan::ObLogPlan(ObOptimizerContext &ctx, const ObDMLStmt *stmt) update_table_metas_(), selectivity_ctx_(ctx, this, stmt), alloc_sfu_list_(), - onetime_copier_(NULL) + onetime_copier_(NULL), + nonrecursive_plan_for_fake_cte_(NULL) { } @@ -966,7 +967,18 @@ int ObLogPlan::pre_process_quals(const ObIArray &table_items, ret = add_subquery_filter(qual); } } else if (qual->is_const_expr()) { - ret = add_startup_filter(qual); + bool is_static_false = false; + if (OB_FAIL(ObOptimizerUtil::check_is_static_false_expr(optimizer_context_, *qual, is_static_false))) { + LOG_WARN("failed to check is static false", K(ret)); + } else if (is_static_false) { + if (OB_FAIL(normal_quals.push_back(qual))) { + LOG_WARN("failed to push back"); + } + } else { + if (OB_FAIL(add_startup_filter(qual))) { + LOG_WARN("failed to add startup filter", K(ret)); + } + } } else if (qual->has_flag(CNT_RAND_FUNC) || qual->has_flag(CNT_DYNAMIC_USER_VARIABLE)) { ret = add_special_expr(qual); @@ -4121,6 +4133,8 @@ int ObLogPlan::allocate_function_table_path(FunctionTablePath *func_table_path, LOG_WARN("failed to append expr", K(ret)); } else if (OB_FAIL(op->compute_property(func_table_path))) { LOG_WARN("failed to compute property", K(ret)); + } else if (OB_FAIL(op->pick_out_startup_filters())) { + LOG_WARN("failed to pick out startup filters", K(ret)); } else { out_access_path_op = op; } @@ -4157,6 +4171,8 @@ int ObLogPlan::allocate_json_table_path(JsonTablePath *json_table_path, LOG_WARN("failed to append expr", K(ret)); } else if (OB_FAIL(op->compute_property(json_table_path))) { LOG_WARN("failed to compute property", K(ret)); + } else if (OB_FAIL(op->pick_out_startup_filters())) { + LOG_WARN("failed to pick out startup filters", K(ret)); } else { out_access_path_op = op; } @@ -4189,6 +4205,8 @@ int ObLogPlan::allocate_temp_table_path(TempTablePath *temp_table_path, LOG_WARN("failed to assign filter exprs", K(ret)); } else if (OB_FAIL(op->compute_property(temp_table_path))) { LOG_WARN("failed to compute property", K(ret)); + } else if (OB_FAIL(op->pick_out_startup_filters())) { + LOG_WARN("failed to pick out startup filters", K(ret)); } else { out_access_path_op = op; } @@ -4219,6 +4237,8 @@ int ObLogPlan::allocate_cte_table_path(CteTablePath *cte_table_path, LOG_WARN("failed to set filters", K(ret)); } else if (OB_FAIL(scan->compute_property(cte_table_path))) { LOG_WARN("failed to compute property", K(ret)); + } else if (OB_FAIL(scan->pick_out_startup_filters())) { + LOG_WARN("failed to pick out startup filters", K(ret)); } else { out_access_path_op = scan; } @@ -4271,7 +4291,6 @@ int ObLogPlan::allocate_access_path(AccessPath *ap, if (NULL != table_schema && table_schema->is_tmp_table()) { scan->set_session_id(table_schema->get_session_id()); } - scan->set_estimate_method(ap->est_cost_info_.row_est_method_); scan->set_pre_query_range(ap->pre_query_range_); scan->set_skip_scan(OptSkipScanState::SS_DISABLE != ap->use_skip_scan_); scan->set_table_type(table_schema->get_table_type()); @@ -5426,6 +5445,8 @@ int ObLogPlan::allocate_subquery_path(SubQueryPath *subpath, LOG_WARN("failed to append pushdown filters", K(ret)); } else if (OB_FAIL(subplan_scan->compute_property(subpath))) { LOG_WARN("failed to compute property", K(ret)); + } else if (OB_FAIL(subplan_scan->pick_out_startup_filters())) { + LOG_WARN("failed to pick out startup filters", K(ret)); } else { out_subquery_path_op = subplan_scan; } @@ -6072,6 +6093,16 @@ int ObLogPlan::prepare_three_stage_info(const ObIArray &group_by_ex } } } + if (OB_SUCC(ret)) { + ObSEArray group_rollup_exprs; + if (OB_FAIL(append(group_rollup_exprs, group_by_exprs))) { + LOG_WARN("failed to append", K(ret)); + } else if (OB_FAIL(append(group_rollup_exprs, rollup_exprs))) { + LOG_WARN("failed to append", K(ret)); + } else if (OB_FAIL(calculate_group_distinct_ndv(group_rollup_exprs, helper))) { + LOG_WARN("failed to calculate group distinct ndv", K(ret), K(helper)); + } + } return ret; } @@ -6210,6 +6241,10 @@ int ObLogPlan::create_three_stage_group_plan(const ObIArray &group_b AggregateAlgo third_aggr_algo; bool can_sort_opt = true; ObLogicalOperator *child = NULL; + ObThreeStageAggrInfo three_stage_info; + double aggr_code_ndv = helper.non_distinct_aggr_items_.empty() ? + helper.distinct_aggr_batch_.count() : + helper.distinct_aggr_batch_.count() + 1; // 1. prepare to allocate the first group by if (OB_ISNULL(top)) { @@ -6220,6 +6255,10 @@ int ObLogPlan::create_three_stage_group_plan(const ObIArray &group_b OB_FAIL(first_group_by_exprs.push_back(helper.aggr_code_expr_)) || OB_FAIL(append(first_group_by_exprs, helper.distinct_params_))) { LOG_WARN("failed to construct first group by exprs", K(ret)); + } else if (OB_FAIL(three_stage_info.set_first_stage_info(helper.aggr_code_expr_, + helper.distinct_aggr_batch_, + aggr_code_ndv))) { + LOG_WARN("failed to set first stage info"); } else if (OB_FAIL(allocate_group_by_as_top(top, HASH_AGGREGATE, first_group_by_exprs, @@ -6230,15 +6269,16 @@ int ObLogPlan::create_three_stage_group_plan(const ObIArray &group_b helper.group_distinct_ndv_, top->get_card(), false, - true))) { + true, + false, + ObRollupStatus::NONE_ROLLUP, + false, + &three_stage_info))) { LOG_WARN("failed to allocate group by as top", K(ret)); } else if (OB_UNLIKELY(LOG_GROUP_BY != top->get_type()) || OB_ISNULL(first_group_by = static_cast(top))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("first group by is invalid", K(ret), KP(top)); - } else if (OB_FAIL(first_group_by->set_first_stage_info(helper.aggr_code_expr_, - helper.distinct_aggr_batch_))) { - LOG_WARN("failed to set first stage info", K(ret)); } // 2. prepare to allocate the second group by @@ -6308,6 +6348,10 @@ int ObLogPlan::create_three_stage_group_plan(const ObIArray &group_b 0, top->get_is_local_order()))) { LOG_WARN("failed to allocate sort and exchange as top", K(ret)); + } else if (OB_FAIL(three_stage_info.set_second_stage_info(helper.aggr_code_expr_, + helper.distinct_aggr_batch_, + helper.distinct_params_))) { + LOG_WARN("failed to set second stage info"); } else if (OB_FAIL(allocate_group_by_as_top(top, second_aggr_algo, second_group_by_exprs, @@ -6315,21 +6359,19 @@ int ObLogPlan::create_three_stage_group_plan(const ObIArray &group_b second_aggr_items, dummy_exprs, false, - helper.group_ndv_ * helper.distinct_aggr_items_.count(), + helper.group_ndv_ * aggr_code_ndv, top->get_card(), false, true, false, - second_rollup_status))) { + second_rollup_status, + false, + &three_stage_info))) { LOG_WARN("failed to allocate group by as top", K(ret)); } else if (OB_UNLIKELY(LOG_GROUP_BY != top->get_type()) || OB_ISNULL(second_group_by = static_cast(top))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("second group by is invalid", K(ret), KP(top)); - } else if (OB_FAIL(second_group_by->set_second_stage_info(helper.aggr_code_expr_, - helper.distinct_aggr_batch_, - helper.distinct_params_))) { - LOG_WARN("failed to set aggr info", K(ret)); } else if (OB_FAIL(second_group_by->set_rollup_info(second_rollup_status, helper.rollup_id_expr_, rd_second_sort_keys, @@ -6389,6 +6431,9 @@ int ObLogPlan::create_three_stage_group_plan(const ObIArray &group_b 0, top->get_is_local_order()))) { LOG_WARN("failed to allocate sort and exchange as top", K(ret)); + } else if (OB_FAIL(three_stage_info.set_third_stage_info(helper.aggr_code_expr_, + helper.distinct_aggr_batch_))) { + LOG_WARN("failed to set third stage info"); } else if (OB_FAIL(allocate_group_by_as_top(top, third_aggr_algo, third_group_by_exprs, @@ -6401,15 +6446,14 @@ int ObLogPlan::create_three_stage_group_plan(const ObIArray &group_b false, false, false, - third_rollup_status))) { + third_rollup_status, + false, + &three_stage_info))) { LOG_WARN("failed to allocate group by as top", K(ret)); } else if (OB_UNLIKELY(LOG_GROUP_BY != top->get_type()) || OB_ISNULL(third_group_by = static_cast(top))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("second group by is invalid", K(ret), KP(top)); - } else if (OB_FAIL(third_group_by->set_third_stage_info(helper.aggr_code_expr_, - helper.distinct_aggr_batch_))) { - LOG_WARN("failed to set aggr info", K(ret)); } else if (OB_FAIL(third_group_by->set_rollup_info(third_rollup_status, helper.rollup_id_expr_))) { LOG_WARN("failed to set rollup parallel info", K(ret)); @@ -6849,32 +6893,50 @@ int ObLogPlan::init_groupby_helper(const ObIArray &group_exprs, LOG_WARN("failed to calculate distinct", K(ret)); } else { /* do nothing */ } } + LOG_TRACE("succeed to check whether aggr can be pushed", K(groupby_helper)); + return ret; +} - if (OB_SUCC(ret) && groupby_helper.can_three_stage_pushdown_) { - double total_ndv = 0; - for (int64_t i = 0; OB_SUCC(ret) && i < groupby_helper.distinct_aggr_items_.count(); ++i) { - ObSEArray group_distinct_exprs; - ObAggFunRawExpr *aggr = NULL; - double ndv = 0; - if (OB_ISNULL(aggr = groupby_helper.distinct_aggr_items_.at(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("distinct aggr item is null", K(ret)); - } else if (OB_FAIL(append(group_distinct_exprs, group_rollup_exprs)) || - OB_FAIL(append(group_distinct_exprs, aggr->get_real_param_exprs()))) { - LOG_WARN("failed to append group distinct exprs", K(ret)); - } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(get_update_table_metas(), - get_selectivity_ctx(), - group_distinct_exprs, - best_plan->get_card(), - ndv))) { - LOG_WARN("failed to calculate distinct", K(ret)); - } else { - total_ndv += ndv; +int ObLogPlan::calculate_group_distinct_ndv(const ObIArray &groupby_rollup_exprs, GroupingOpHelper &groupby_helper) +{ + int ret = OB_SUCCESS; + double total_ndv = 0; + ObLogicalOperator *best_plan = NULL; + if (OB_FAIL(candidates_.get_best_plan(best_plan))) { + LOG_WARN("failed to get best plan", K(ret)); + } else if (OB_ISNULL(best_plan)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else { + get_selectivity_ctx().init_op_ctx(&best_plan->get_output_equal_sets(), best_plan->get_card()); + } + for (int64_t i = 0; OB_SUCC(ret) && i < groupby_helper.distinct_aggr_batch_.count(); ++i) { + ObSEArray group_distinct_exprs; + ObDistinctAggrBatch &distinct_aggr_batch = groupby_helper.distinct_aggr_batch_.at(i); + double ndv = 0; + for (int64_t j = 0; OB_SUCC(ret) && j < distinct_aggr_batch.mocked_params_.count(); j ++) { + if (OB_FAIL(group_distinct_exprs.push_back(distinct_aggr_batch.mocked_params_.at(j).first))) { + LOG_WARN("Failed to push back exprs", K(ret)); } } - groupby_helper.group_distinct_ndv_ = total_ndv; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(append(group_distinct_exprs, groupby_rollup_exprs))) { + LOG_WARN("failed to append group distinct exprs", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(get_update_table_metas(), + get_selectivity_ctx(), + group_distinct_exprs, + get_selectivity_ctx().get_current_rows(), + ndv))) { + LOG_WARN("failed to calculate distinct", K(ret)); + } else { + total_ndv += ndv; + } } - LOG_TRACE("succeed to check whether aggr can be pushed", K(groupby_helper)); + if (OB_SUCC(ret) && !groupby_helper.non_distinct_aggr_items_.empty()) { + total_ndv += groupby_helper.group_ndv_; + } + groupby_helper.group_distinct_ndv_ = total_ndv; + LOG_TRACE("succeed to calculate group distinct ndv for three stage", K(groupby_helper)); return ret; } @@ -6890,7 +6952,7 @@ int ObLogPlan::init_distinct_helper(const ObIArray &distinct_exprs, distinct_helper.force_use_merge_ = get_log_plan_hint().use_merge_distinct(); if (OB_FAIL(candidates_.get_best_plan(best_plan))) { LOG_WARN("failed to get best plan", K(ret)); - } else if (OB_ISNULL(best_plan)) { + } else if (OB_ISNULL(best_plan) || OB_ISNULL(get_stmt())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); } else if (get_log_plan_hint().no_pushdown_distinct()) { @@ -6914,6 +6976,18 @@ int ObLogPlan::init_distinct_helper(const ObIArray &distinct_exprs, get_selectivity_ctx().init_op_ctx(&best_plan->get_output_equal_sets(), best_plan->get_card()); if (distinct_exprs.empty()) { distinct_helper.group_ndv_ = 1.0; + } else if (get_stmt()->is_set_stmt()) { + // union distinct + const ObSelectStmt *sel_stmt = static_cast(get_stmt()); + distinct_helper.group_ndv_ = 0.0; + for (int64_t i = 0; i < sel_stmt->get_set_query().count(); i ++) { + const OptTableMeta *table_meta = get_update_table_metas().get_table_meta_by_table_id(i); + double child_ndv = 0; + if (OB_NOT_NULL(table_meta)) { + child_ndv = table_meta->get_distinct_rows(); + } + distinct_helper.group_ndv_ += child_ndv; + } } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(get_update_table_metas(), get_selectivity_ctx(), distinct_exprs, @@ -8266,7 +8340,8 @@ int ObLogPlan::allocate_group_by_as_top(ObLogicalOperator *&top, const bool is_push_down, const bool is_partition_gi, const ObRollupStatus rollup_status, - bool force_use_scalar /*false*/) + bool force_use_scalar /*false*/, + const ObThreeStageAggrInfo *three_stage_info) { int ret = OB_SUCCESS; ObLogGroupBy *group_by = NULL; @@ -8301,6 +8376,9 @@ int ObLogPlan::allocate_group_by_as_top(ObLogicalOperator *&top, LOG_WARN("failed to set aggregation exprs", K(ret)); } else if (OB_FAIL(group_by->get_filter_exprs().assign(having_exprs))) { LOG_WARN("failed to set filter exprs", K(ret)); + } else if (NULL != three_stage_info && + OB_FAIL(group_by->set_three_stage_info(*three_stage_info))) { + LOG_WARN("failed to set three stage info", K(ret)); } else if (OB_FAIL(group_by->compute_property())) { LOG_WARN("failed to compute property", K(ret)); } else { @@ -14558,6 +14636,8 @@ int ObLogPlan::allocate_values_table_path(ValuesTablePath *values_table_path, LOG_WARN("failed to append expr", K(ret)); } else if (OB_FAIL(values_op->compute_property(values_table_path))) { LOG_WARN("failed to compute propery", K(ret)); + } else if (OB_FAIL(values_op->pick_out_startup_filters())) { + LOG_WARN("failed to pick out startup filters", K(ret)); } else { out_access_path_op = values_op; } diff --git a/src/sql/optimizer/ob_log_plan.h b/src/sql/optimizer/ob_log_plan.h index 437d5268e8..b990aec586 100644 --- a/src/sql/optimizer/ob_log_plan.h +++ b/src/sql/optimizer/ob_log_plan.h @@ -85,6 +85,8 @@ class ObExchangeInfo; class ObDmlTableInfo; struct IndexDMLInfo; class ValuesTablePath; +class ObSelectLogPlan; +class ObThreeStageAggrInfo; struct FunctionTableDependInfo { TO_STRING_KV( @@ -459,6 +461,8 @@ public: ObExchangeInfo &right_exch_info); void set_insert_stmt(const ObInsertStmt *insert_stmt) { insert_stmt_ = insert_stmt; } const ObInsertStmt *get_insert_stmt() const { return insert_stmt_; } + void set_nonrecursive_plan_for_fake_cte(ObSelectLogPlan *plan) { nonrecursive_plan_for_fake_cte_ = plan; } + ObSelectLogPlan *get_nonrecursive_plan_for_fake_cte() { return nonrecursive_plan_for_fake_cte_; } public: struct All_Candidate_Plans @@ -545,7 +549,13 @@ public: K_(force_use_merge), K_(is_scalar_group_by), K_(distinct_exprs), - K_(pushdown_groupby_columns)); + K_(pushdown_groupby_columns), + K_(group_ndv), + K_(group_distinct_ndv), + K_(distinct_params), + K_(distinct_aggr_batch), + K_(distinct_aggr_items), + K_(non_distinct_aggr_items)); }; /** @@ -758,6 +768,8 @@ public: const bool is_from_povit, GroupingOpHelper &groupby_helper); + int calculate_group_distinct_ndv(const ObIArray &groupby_rollup_exprs, GroupingOpHelper &groupby_helper); + int init_distinct_helper(const ObIArray &distinct_exprs, GroupingOpHelper &distinct_helper); @@ -919,7 +931,8 @@ public: const bool is_push_down = false, const bool is_partition_gi = false, const ObRollupStatus rollup_status = ObRollupStatus::NONE_ROLLUP, - bool force_use_scalar = false); + bool force_use_scalar = false, + const ObThreeStageAggrInfo *three_stage_info = NULL); int candi_allocate_limit(const ObIArray &order_items); @@ -1948,6 +1961,8 @@ private: common::ObSEArray, 4, common::ModulePageAllocator, true > onetime_replaced_exprs_; common::ObSEArray new_or_quals_; + + ObSelectLogPlan *nonrecursive_plan_for_fake_cte_; DISALLOW_COPY_AND_ASSIGN(ObLogPlan); }; diff --git a/src/sql/optimizer/ob_log_plan.h.review b/src/sql/optimizer/ob_log_plan.h.review deleted file mode 100644 index 3023e720c2..0000000000 --- a/src/sql/optimizer/ob_log_plan.h.review +++ /dev/null @@ -1,286 +0,0 @@ -// Copyright (c) 2014 Alibaba Inc. All Rights Reserved. -// Author: -// -#ifndef _OB_LOG_PLAN_H -#define _OB_LOG_PLAN_H 1 -#include "lib/allocator/page_arena.h" -#include "lib/string/ob_string.h" -#include "ob_optimizer_context.h" -#include "sql/resolver/ob_stmt.h" -#include "ob_log_operator_factory.h" - -namespace test -{ - class ObLogPlanTest_ob_explain_test_Test; -} - -namespace oceanbase -{ -namespace sql -{ - class ObLogicalOperator; - class ObJoinOrder; - class AccessPath; - class Path; - class JoinOrder; - class ObJoinOrder; - struct JoinInfo; - /** - * @enum TraverseOp - * @brief Plan tree traversal operations - */ - enum TraverseOp - { - ALLOC_EXPR_R1 = 0, // allocate expr before parallel optimization - ALLOC_EXPR_R2, // allocate expr before parallel optimization - ALLOC_EXCH, // allocating exchange ( // optimization) - OPERATOR_NUMBERING, // numbering operators - GEN_SIGNATURE, // generating plan signature - EXPLAIN_COLLECT_WIDTH, // explain calculate column width - EXPLAIN_WRITE_BUFFER, // explain write plan table - EXPLAIN_WRITE_BUFFER_OUTPUT, // explain write output and filters - TRAVERSE_OP_END - }; - const static int64_t max_plan_column_width = 500; - const static int64_t max_plan_column = 20; - - // Explain plan text formatter - struct plan_formatter - { - const char *column_name[max_plan_column]; - int64_t num_of_columns; - int column_width[max_plan_column_width]; - }; - - // define operatory enum type -#define KEYS_DEF \ - KEY_DEF( Id, "ID" ), \ - KEY_DEF( Operator, "OPERATOR" ), \ - KEY_DEF( Name, "NAME" ), \ - KEY_DEF( Est_Rows, "EST. ROWS" ), \ - KEY_DEF( Cost, "COST" ), \ - KEY_DEF( Max_Plan_Column, "End" ) -#define KEY_DEF( identifier, name ) identifier - enum ExplainColumnEnumType { KEYS_DEF }; -#undef KEY_DEF - - // each line of the explain plan text - struct plan_line - { - int id; - int level; - common::ObArray< common::ObString > column_value; - }; - - - // explain plan text - class planText - { - public: - planText(char *buffer, const int64_t buffer_len) - : level(0), line_id(0), buf(buffer), buf_len(buffer_len), pos(0) - { -#undef KEY_DEF -#define KEY_DEF( identifier, name ) name - static const char *names[] = { KEYS_DEF }; -#undef KEY_DEF - for (int i = 0; i < Max_Plan_Column; i++) { - formatter.column_width[i] = (int) strlen(names[i]); - formatter.column_name[i] = names[i]; - } - formatter.num_of_columns = Max_Plan_Column; - } - int level; - uint64_t line_id; - char *buf; - int64_t buf_len; - int64_t pos; - plan_formatter formatter; - }; -#undef KYES_DEF - - /** - * Base class for logical plan for all DML/select statements - */ - class ObLogPlan - { - public: - friend class test::ObLogPlanTest_ob_explain_test_Test; - - public: - ObLogPlan(ObOptimizerContext *ctx, const ObStmt *stmt); - virtual ~ObLogPlan(); - inline common::ObString &get_sql_text() {return sql_text_;} - - // @brief Get the corresponding stmt - inline virtual const ObStmt *get_stmt() const { return stmt_; } - - // @brief Get the ptr to the root - inline ObLogicalOperator * get_plan_root() const { return root_; } - - // @brief Set the root of the plan - inline void set_plan_root(ObLogicalOperator *root) {root_ = root;} - /** - * @brief Generate the plan tree - * @param void - * @retval OB_SUCCESS execute success - * @retval OB_OPTIMIZE_GEN_PLAN_FALIED failed to generate the logical plan - * - * The function will be invoked by all DML/select statements and will handle - * the 'common' part of those statements, including joins, order-by, limit and - * etc. - */ - virtual int generate_plan_tree(); - - /** - * Generate the "explain plan" string - */ - int64_t to_string(char *buf, const int64_t buf_len) const; - - /** - * Get optimizer context - */ - ObOptimizerContext *get_optimizer_context() const { return optimizer_context_; } - - /** - * GENERATE logical PLAN - * - * The general public interface to generate a logical plan for a 'select' statement - */ - virtual int generate_plan() = 0; - ObIAllocator *get_allocator() const { return allocator_; } - virtual ObLogOperatorFactory *get_log_op_factory() { return &log_op_factory_; } - - /** - * @brief Copy the whole plan tree - * @param [in] src - the source plan tree - * @param [in/out] dst - the destination plan tree - * @retval OB_SUCCESS execute success - * @retval OB_SOME_ERROR special errno need to handle - */ - virtual int plan_tree_copy(ObLogicalOperator *src, ObLogicalOperator *&dst); - - /** - * List all needed plan traversals - */ - int plan_traverse_loop(int op1, ...); - protected: - - struct CandidatePlan - { - common::ObArray< ObRawExpr* > ordering_; - ObLogicalOperator *plan_tree_; - }; - - struct All_Candidate_Plans - { - common::ObArray candidate_plans_; - std::pair plain_plan_; - }; - - /** - * @brief Genearete a specified operator on top of a list of candidate plans - * @param [out] jos - the generated Join_OrderS - * @retval OB_SUCCESS execute success - * @retval OB_SOME_ERROR special errno need to handle - */ - int generate_join_orders(ObJoinOrder &jo); - - /** @brief Allocate a join operator based on join algorithms */ - ObLogicalOperator * allocate_join(JoinAlgo join_algo); - - /** @brief Allcoate operator for access path */ - ObLogicalOperator * allocate_access_path(AccessPath *ap); - - /** @brief Create plan tree from an interesting order */ - ObLogicalOperator * create_plan_tree_from_path(Path *jo); - - /** @brief Initialize the candidate plans from join order */ - int candi_init(ObJoinOrder &jo); - - /** @brief Get order by columns */ - int get_order_by_columns(common::ObIArray &order_by_columns); - - /** @brief Allocate ORDER BY on top of plan candidates */ - int candi_allocate_order_by(); - - /** @brief Allocate LIMIT on top of plan candidates */ - int candi_allocate_limit(); - - /** - * Plan tree traversing(both top-down and bottom-up) - */ - int plan_tree_traverse(const TraverseOp &operation, void *ctx); - - inline void set_signature(uint64_t hash_value) { hash_value_ = hash_value; } - - //added by ryan.ly 20141230 - //start.. - int generate_base_rel(ObJoinOrder* jo, uint32_t table_idx); - int generate_baserel_path(ObJoinOrder* jo); - int distribute_quals_to_rels(ObIArray& baserels); - int distribute_qual_to_rel(ObJoinOrder* rel, ObRawExpr* qual, ObIArray& cur_vars); - int generate_join_rel(ObJoinOrder* left_tree, - ObJoinOrder* right_tree, - JoinInfo* join_info, - ObJoinOrder** join_tree); - int generate_join_path(ObJoinOrder* left_tree, - ObJoinOrder* right_tree, - ObJoinOrder* joinrel, - JoinInfo* join_info); - int generate_join_level(ObArray< ObArray >& join_rels_, uint32_t level); - int create_nl_path(ObJoinOrder* joinrel, - Path* left_path, - Path* right_path, - JoinInfo* join_info, - Path** join_path); - int create_mj_path(ObJoinOrder* joinrel, - Path* left_path, - Path* right_path, - JoinInfo* join_info, - ObArray& join_conditions, - ObArray& join_filters, - ObArray& ordering_, - Path** join_path); - ObJoinOrder* find_base_rel(ObIArray& base_level, int32_t table_idx); - ObJoinOrder* find_join_rel(ObIArray& join_level, ObBitSet<>& relids); - ObJoinOrder* find_min_rel(ObIArray& path_set); - int extract_mergejoin_conditions(ObJoinOrder* join_rel, - ObIArray& join_quals, - ObIArray& join_conditions, - ObIArray& join_filters); - int make_ordering_for_mergejoin(ObIArray& join_conditions, - ObJoinOrder* join_rel, - ObIArray& ordering_); - int make_ordering_for_conditions(ObBitSet<>& left_relids, - ObIArray& join_conditions, - ObIArray& ordering); - int make_fixed_join_filters(ObIArray& join_conditions, - ObIArray& fixed_conditions, - ObIArray& join_filters, - ObIArray& fixed_filters); - int32_t get_var_relid(ObRawExpr* var); - JoinInfo* find_joininfo(ObJoinOrder* rel, ObBitSet<>& unjoined_tables); - ObLogicalOperator *allocate_path(Path* path); - ObLogicalOperator *allocate_join_path(JoinOrder* join_order); - //..end - - protected: // member variable - ObOptimizerContext *optimizer_context_; - ObIAllocator *allocator_; - const ObStmt *stmt_; - ObLogOperatorFactory log_op_factory_; // operator factory - All_Candidate_Plans candidates_; - private: // member functions - int remove_all_candidates_except_one(int64_t except); - - private: // memeber variable - ObLogicalOperator *root_; // root operator - int64_t flag_; - common::ObString sql_text_; // SQL string - uint64_t hash_value_; // signature - DISALLOW_COPY_AND_ASSIGN(ObLogPlan); - }; -} -} -#endif // _OB_LOG_PLAN_H diff --git a/src/sql/optimizer/ob_log_set.cpp b/src/sql/optimizer/ob_log_set.cpp index f934e9b5d8..6dc55cff0f 100644 --- a/src/sql/optimizer/ob_log_set.cpp +++ b/src/sql/optimizer/ob_log_set.cpp @@ -421,30 +421,52 @@ int ObLogSet::get_re_est_cost_infos(const EstimateCostInfo ¶m, const double need_row_count = (is_recursive_union() || !is_set_distinct()) && param.need_row_count_ >= 0 && param.need_row_count_ < card_ ? param.need_row_count_ : -1; + bool need_scale_ndv = (need_row_count == -1); double cur_child_card = 0.0; double cur_child_cost = 0.0; + if (OB_UNLIKELY(is_set_distinct() && get_num_of_child() != child_ndv_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected child ndv count", K(child_ndv_)); + } for (int64_t i = 0; OB_SUCC(ret) && i < get_num_of_child(); ++i) { const ObLogicalOperator *child = get_child(i); cur_param.reset(); + double origin_child_card = 0; if (OB_ISNULL(child)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("set operator i-th child is null", K(ret), K(i)); } else if (OB_FAIL(cur_param.assign(param))) { LOG_WARN("failed to assign param", K(ret)); - } else if (OB_FALSE_IT(cur_param.need_row_count_ = need_row_count)) { + } else { + cur_param.need_row_count_ = need_row_count; + origin_child_card = child->get_card(); + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(get_child(i)->re_est_cost(cur_param, cur_child_card, cur_child_cost))) { LOG_WARN("failed to re-est child cost", K(ret), K(i)); } else if (OB_FAIL(cost_infos.push_back(ObBasicCostInfo(cur_child_card, cur_child_cost, child->get_width())))) { LOG_WARN("push back child's cost info failed", K(ret)); - } else if (ObSelectStmt::UNION == get_set_op()) { - card += cur_child_card; + } else if (ObSelectStmt::UNION == get_set_op() && !is_set_distinct()) { + ObSelectStmt::SetOperator set_type = is_recursive_union() ? ObSelectStmt::RECURSIVE : ObSelectStmt::UNION; + if (0 == i) { + card = cur_child_card; + } else { + card = ObOptSelectivity::get_set_stmt_output_count(card, cur_child_card, set_type); + } child_cost += cur_child_cost; - } else if (ObSelectStmt::INTERSECT == get_set_op()) { - card = (0 == i || cur_child_card < card) ? cur_child_card : card; - child_cost += cur_child_cost; - } else if (ObSelectStmt::EXCEPT == get_set_op()) { - card = 0 == i ? cur_child_card : card; + } else { + double cur_child_ndv = child_ndv_.at(i); + if (need_scale_ndv) { + cur_child_ndv = std::min( + cur_child_ndv, + ObOptSelectivity::scale_distinct(cur_child_card, origin_child_card, cur_child_ndv)); + } + if (0 == i) { + card = cur_child_ndv; + } else { + card = ObOptSelectivity::get_set_stmt_output_count(card, cur_child_ndv, get_set_op()); + } child_cost += cur_child_cost; } } diff --git a/src/sql/optimizer/ob_log_set.h b/src/sql/optimizer/ob_log_set.h index 7f100c3fdf..20bb8430f4 100644 --- a/src/sql/optimizer/ob_log_set.h +++ b/src/sql/optimizer/ob_log_set.h @@ -118,6 +118,8 @@ public: virtual int print_used_hint(PlanText &plan_text) override; int get_used_pq_set_hint(const ObPQSetHint *&used_hint); int construct_pq_set_hint(ObPQSetHint &hint); + int set_child_ndv(ObIArray &ndv) { return child_ndv_.assign(ndv); } + int add_child_ndv(double ndv) { return child_ndv_.push_back(ndv); } private: bool is_distinct_; bool is_recursive_union_; @@ -130,6 +132,7 @@ private: //for cte search clause common::ObSEArray search_ordering_; common::ObSEArray cycle_items_; + common::ObSEArray child_ndv_; }; } // end of namespace sql diff --git a/src/sql/optimizer/ob_log_table_scan.cpp b/src/sql/optimizer/ob_log_table_scan.cpp index fc4a45ef96..831cd49e37 100644 --- a/src/sql/optimizer/ob_log_table_scan.cpp +++ b/src/sql/optimizer/ob_log_table_scan.cpp @@ -1062,39 +1062,6 @@ int ObLogTableScan::pick_out_query_range_exprs() return ret; } -int ObLogTableScan::pick_out_startup_filters() -{ - int ret = OB_SUCCESS; - ObLogPlan *plan = get_plan(); - const ParamStore *params = NULL; - ObOptimizerContext *opt_ctx = NULL; - ObArray filter_exprs; - if (OB_ISNULL(plan) - || OB_ISNULL(opt_ctx = &plan->get_optimizer_context()) - || OB_ISNULL(params = opt_ctx->get_params())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("NULL pointer error", K(plan), K(opt_ctx), K(ret)); - } else if (OB_FAIL(filter_exprs.assign(filter_exprs_))) { - LOG_WARN("assign filter exprs failed", K(ret)); - } else { - filter_exprs_.reset(); - } - for (int64_t i = 0; OB_SUCC(ret) && i < filter_exprs.count(); ++i) { - ObRawExpr *qual = filter_exprs.at(i); - if (OB_ISNULL(qual)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpect null expr", K(ret)); - } else if (qual->is_static_const_expr()) { - if (OB_FAIL(startup_exprs_.push_back(qual))) { - LOG_WARN("add filter expr failed", K(i), K(ret)); - } else { /* Do nothing */ } - } else if (OB_FAIL(filter_exprs_.push_back(qual))) { - LOG_WARN("add filter expr failed", K(i), K(ret)); - } else { /* Do nothing */ } - } - return ret; -} - int ObLogTableScan::init_calc_part_id_expr() { int ret = OB_SUCCESS; @@ -1274,6 +1241,9 @@ int ObLogTableScan::get_plan_item_info(PlanText &plan_text, LOG_WARN("BUF_PRINTF fails", K(ret)); } else if (OB_FAIL(BUF_PRINTF("dynamic sampling level:%ld", table_meta->get_ds_level()))) { LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_NOT_NULL(est_cost_info_) && + OB_FAIL(print_est_method(est_cost_info_->est_method_, buf, buf_len, pos))) { + LOG_WARN("failed to print est method", K(ret)); } END_BUF_PRINT(plan_item.optimizer_, plan_item.optimizer_len_); } @@ -1344,6 +1314,41 @@ int ObLogTableScan::get_plan_item_info(PlanText &plan_text, return ret; } +int ObLogTableScan::print_est_method(ObBaseTableEstMethod method, char *buf, int64_t &buf_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + if (method == EST_INVALID) { + // do nothing + } else if (OB_FAIL(BUF_PRINTF(NEW_LINE))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF(OUTPUT_PREFIX))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF("estimation method:["))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if ((EST_DEFAULT & method) && + OB_FAIL(BUF_PRINTF("DEFAULT, "))) { + LOG_WARN("BUF_PRINTF fails"); + } else if ((EST_STAT & method) && + OB_FAIL(BUF_PRINTF("OPTIMIZER STATISTICS, "))) { + LOG_WARN("BUF_PRINTF fails"); + } else if ((EST_STORAGE & method) && + OB_FAIL(BUF_PRINTF("STORAGE, "))) { + LOG_WARN("BUF_PRINTF fails"); + } else if (((EST_DS_BASIC) & method) && + OB_FAIL(BUF_PRINTF("DYNAMIC SAMPLING BASIC, "))) { + LOG_WARN("BUF_PRINTF fails"); + } else if (((EST_DS_FULL) & method) && + OB_FAIL(BUF_PRINTF("DYNAMIC SAMPLING FULL, "))) { + LOG_WARN("BUF_PRINTF fails"); + } else { + pos -= 2; + if (OB_FAIL(BUF_PRINTF("]"))) { + LOG_WARN("BUF_PRINTF fails"); + } + } + return ret; +} + int ObLogTableScan::get_plan_object_info(PlanText &plan_text, ObSqlPlanItem &plan_item) { @@ -2048,7 +2053,7 @@ bool ObLogTableScan::is_need_feedback() const ret = sel >= SELECTION_THRESHOLD && !is_multi_part_table_scan_; - LOG_TRACE("is_need_feedback", K(estimate_method_), K(table_row_count), + LOG_TRACE("is_need_feedback", K(table_row_count), K(logical_query_range_row_count), K(sel), K(ret)); return ret; } diff --git a/src/sql/optimizer/ob_log_table_scan.h b/src/sql/optimizer/ob_log_table_scan.h index 1df3a8a56e..4f95cb3315 100644 --- a/src/sql/optimizer/ob_log_table_scan.h +++ b/src/sql/optimizer/ob_log_table_scan.h @@ -56,7 +56,6 @@ public: limit_offset_expr_(NULL), sample_info_(), est_cost_info_(NULL), - estimate_method_(INVALID_METHOD), table_opt_info_(NULL), est_records_(), part_expr_(NULL), @@ -366,8 +365,6 @@ public: inline double get_logical_query_range_row_count() const { return est_cost_info_ == NULL ? 0.0 : est_cost_info_->logical_query_range_row_count_; } inline void set_index_back_row_count(double index_back_row_count) { if (est_cost_info_ != NULL) est_cost_info_->index_back_row_count_ = index_back_row_count; } inline double get_index_back_row_count() const { return est_cost_info_ == NULL ? 0.0 : est_cost_info_->index_back_row_count_; } - inline void set_estimate_method(RowCountEstMethod method) { estimate_method_ = method; } - inline RowCountEstMethod get_estimate_method() const { return estimate_method_; } int is_top_table_scan(bool &is_top_table_scan) { int ret = common::OB_SUCCESS; @@ -459,6 +456,7 @@ public: share::schema::ObTableType get_table_type() const { return table_type_; } virtual int get_plan_item_info(PlanText &plan_text, ObSqlPlanItem &plan_item) override; + int print_est_method(ObBaseTableEstMethod method, char *buf, int64_t &buf_len, int64_t &pos); int get_plan_object_info(PlanText &plan_text, ObSqlPlanItem &plan_item); inline ObTablePartitionInfo *get_global_index_back_table_partition_info() { return global_index_back_table_partition_info_; } @@ -484,7 +482,6 @@ public: private: // member functions //called when index_back_ set int pick_out_query_range_exprs(); - int pick_out_startup_filters(); int filter_before_index_back_set(); virtual int print_outline_data(PlanText &plan_text) override; virtual int print_used_hint(PlanText &plan_text) override; @@ -561,7 +558,6 @@ protected: // memeber variables // 记录该表是否采样、采样方式、比例等信息 SampleInfo sample_info_; ObCostTableScanInfo *est_cost_info_; - RowCountEstMethod estimate_method_; BaseTableOptInfo *table_opt_info_; common::ObSEArray est_records_; diff --git a/src/sql/optimizer/ob_logical_operator.cpp b/src/sql/optimizer/ob_logical_operator.cpp index 8380dc94f6..3e9101074d 100644 --- a/src/sql/optimizer/ob_logical_operator.cpp +++ b/src/sql/optimizer/ob_logical_operator.cpp @@ -1097,6 +1097,7 @@ int ObLogicalOperator::re_est_cost(EstimateCostInfo ¶m, double &card, double param.need_row_count_ = (get_card() <= param.need_row_count_ || 0 > param.need_row_count_) ? -1 : param.need_row_count_; double op_cost = 0.0; + bool contain_false_filter = false; card = 0.0; cost = 0.0; if (!param.need_re_est(get_parallel(), get_card())) { // no need to re est cost @@ -1106,6 +1107,10 @@ int ObLogicalOperator::re_est_cost(EstimateCostInfo ¶m, double &card, double LOG_WARN("failed to check need parallel valid", K(ret)); } else if (OB_FAIL(SMART_CALL(do_re_est_cost(param, card, op_cost, cost)))) { LOG_WARN("failed to do re est operator", K(ret)); + } else if (OB_FAIL(check_contain_false_startup_filter(contain_false_filter))) { + LOG_WARN("failed to check startup filter", K(ret)); + } else if (contain_false_filter && FALSE_IT(card = 0.0)) { + // never reach } else if (!param.override_) { /* do nothing */ } else if (OB_ISNULL(get_plan())) { @@ -5657,6 +5662,60 @@ int ObLogicalOperator::collect_batch_exec_param_post(void* ctx) return ret; } +int ObLogicalOperator::pick_out_startup_filters() +{ + int ret = OB_SUCCESS; + ObLogPlan *plan = get_plan(); + const ParamStore *params = NULL; + ObOptimizerContext *opt_ctx = NULL; + ObArray filter_exprs; + if (OB_ISNULL(plan) + || OB_ISNULL(opt_ctx = &plan->get_optimizer_context()) + || OB_ISNULL(params = opt_ctx->get_params())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("NULL pointer error", K(plan), K(opt_ctx), K(ret)); + } else if (OB_FAIL(filter_exprs.assign(filter_exprs_))) { + LOG_WARN("assign filter exprs failed", K(ret)); + } else { + filter_exprs_.reset(); + } + for (int64_t i = 0; OB_SUCC(ret) && i < filter_exprs.count(); ++i) { + ObRawExpr *qual = filter_exprs.at(i); + if (OB_ISNULL(qual)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect null expr", K(ret)); + } else if (qual->is_static_const_expr()) { + if (OB_FAIL(startup_exprs_.push_back(qual))) { + LOG_WARN("add filter expr failed", K(i), K(ret)); + } else { /* Do nothing */ } + } else if (OB_FAIL(filter_exprs_.push_back(qual))) { + LOG_WARN("add filter expr failed", K(i), K(ret)); + } else { /* Do nothing */ } + } + return ret; +} + +int ObLogicalOperator::check_contain_false_startup_filter(bool &contain_false) +{ + int ret = OB_SUCCESS; + contain_false = false; + if (OB_ISNULL(get_plan())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("NULL pointer error", K(get_plan()), K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < startup_exprs_.count(); ++i) { + ObRawExpr *qual = startup_exprs_.at(i); + if (OB_ISNULL(qual)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect null expr", K(ret)); + } else if (OB_FAIL(ObOptimizerUtil::check_is_static_false_expr( + get_plan()->get_optimizer_context(), *qual, contain_false))) { + LOG_WARN("failed to check is static false", K(ret)); + } else { /* Do nothing */ } + } + return ret; +} + int ObLogicalOperator::collect_batch_exec_param(void* ctx, const ObIArray &exec_params, ObIArray &left_above_params, diff --git a/src/sql/optimizer/ob_logical_operator.h b/src/sql/optimizer/ob_logical_operator.h index 7580d1f111..82d687dbe9 100644 --- a/src/sql/optimizer/ob_logical_operator.h +++ b/src/sql/optimizer/ob_logical_operator.h @@ -1687,6 +1687,8 @@ public: ObIArray &right_above_params); // 生成 partition id 表达式 int generate_pseudo_partition_id_expr(ObOpPseudoColumnRawExpr *&expr); + int pick_out_startup_filters(); + int check_contain_false_startup_filter(bool &contain_false); public: ObSEArray child_; diff --git a/src/sql/optimizer/ob_opt_default_stat.h b/src/sql/optimizer/ob_opt_default_stat.h index 7944ba5287..1f7ceae026 100644 --- a/src/sql/optimizer/ob_opt_default_stat.h +++ b/src/sql/optimizer/ob_opt_default_stat.h @@ -76,12 +76,7 @@ const double DEFAULT_AGG_EQ = 0.01; // clob/blob like "xxx" 的默认选择率 const double DEFAULT_CLOB_LIKE_SEL = 0.05; const double DEFAULT_ANTI_JOIN_SEL = 0.01; - -enum ObEstimateType -{ - OB_DEFAULT_STAT_EST, - OB_CURRENT_STAT_EST, -}; +const double DEFAULT_INEQ_JOIN_SEL = 0.05; } // namespace common } // namespace oceanabse diff --git a/src/sql/optimizer/ob_opt_est_cost_model.cpp b/src/sql/optimizer/ob_opt_est_cost_model.cpp index 13d164241f..08e0b5c754 100644 --- a/src/sql/optimizer/ob_opt_est_cost_model.cpp +++ b/src/sql/optimizer/ob_opt_est_cost_model.cpp @@ -89,7 +89,7 @@ int ObCostTableScanInfo::assign(const ObCostTableScanInfo &est_cost_info) can_use_batch_nlj_ = est_cost_info.can_use_batch_nlj_; table_metas_ = est_cost_info.table_metas_; sel_ctx_ = est_cost_info.sel_ctx_; - row_est_method_ = est_cost_info.row_est_method_; + est_method_ = est_cost_info.est_method_; prefix_filter_sel_ = est_cost_info.prefix_filter_sel_; pushdown_prefix_filter_sel_ = est_cost_info.pushdown_prefix_filter_sel_; postfix_filter_sel_ = est_cost_info.postfix_filter_sel_; diff --git a/src/sql/optimizer/ob_opt_est_cost_model.h b/src/sql/optimizer/ob_opt_est_cost_model.h index 33c4da8f54..a41222c453 100644 --- a/src/sql/optimizer/ob_opt_est_cost_model.h +++ b/src/sql/optimizer/ob_opt_est_cost_model.h @@ -33,14 +33,17 @@ class OptSelectivityCtx; class ObOptCostModelParameter; class OptSystemStat; -enum RowCountEstMethod +enum RowCountEstMethod { INVALID_METHOD = 0 }; // deprecated +enum ObBaseTableEstBasicMethod { - INVALID_METHOD = 0, - DEFAULT_STAT, - BASIC_STAT, //use min/max/ndv to estimate row count - STORAGE_STAT, //use storage layer to estimate row count - DYNAMIC_SAMPLING_STAT //use dynamic sampling to estimate row count + EST_INVALID = 0, + EST_DEFAULT = 1 << 0, + EST_STAT = 1 << 1, + EST_STORAGE = 1 << 2, + EST_DS_BASIC = 1 << 3, + EST_DS_FULL = 1 << 4, }; +typedef uint64_t ObBaseTableEstMethod; // all the table meta info need to compute cost struct ObTableMetaInfo @@ -216,7 +219,7 @@ struct ObCostTableScanInfo table_filters_(), table_metas_(NULL), sel_ctx_(NULL), - row_est_method_(RowCountEstMethod::INVALID_METHOD), + est_method_(EST_INVALID), prefix_filter_sel_(1.0), pushdown_prefix_filter_sel_(1.0), postfix_filter_sel_(1.0), @@ -242,7 +245,7 @@ struct ObCostTableScanInfo K_(table_meta_info), K_(index_meta_info), K_(access_column_items), K_(is_virtual_table), K_(is_unique), - K_(is_inner_path), K_(can_use_batch_nlj), + K_(is_inner_path), K_(can_use_batch_nlj), K_(est_method), K_(prefix_filter_sel), K_(pushdown_prefix_filter_sel), K_(postfix_filter_sel), K_(table_filter_sel), K_(ss_prefix_ndv), K_(ss_postfix_range_filters_sel), @@ -278,7 +281,7 @@ struct ObCostTableScanInfo OptTableMetas *table_metas_; OptSelectivityCtx *sel_ctx_; // the following information are useful when estimating cost - RowCountEstMethod row_est_method_; // row_est_method + ObBaseTableEstMethod est_method_; double prefix_filter_sel_; double pushdown_prefix_filter_sel_; double postfix_filter_sel_; diff --git a/src/sql/optimizer/ob_opt_est_utils.cpp b/src/sql/optimizer/ob_opt_est_utils.cpp index a7f00fabb2..6c19e2a9ca 100644 --- a/src/sql/optimizer/ob_opt_est_utils.cpp +++ b/src/sql/optimizer/ob_opt_est_utils.cpp @@ -198,10 +198,12 @@ int ObOptEstUtils::if_expr_start_with_patten_sign(const ParamStore *params, const ObRawExpr *esp_expr, ObExecContext *exec_ctx, ObIAllocator &allocator, - bool &is_start_with) + bool &is_start_with, + bool &all_is_percent_sign) { int ret = OB_SUCCESS; is_start_with = false; + all_is_percent_sign = false; bool get_value = false; bool empty_escape = false; char escape; @@ -234,6 +236,15 @@ int ObOptEstUtils::if_expr_start_with_patten_sign(const ParamStore *params, } } else { /* do nothing */ } } + if (OB_SUCC(ret) && is_start_with) { + all_is_percent_sign = true; + const ObString &expr_str = value.get_string(); + for (int64_t i = 0; all_is_percent_sign && i < expr_str.length(); i++) { + if (expr_str[i] != '%') { + all_is_percent_sign = false; + } + } + } return ret; } diff --git a/src/sql/optimizer/ob_opt_est_utils.h b/src/sql/optimizer/ob_opt_est_utils.h index cc36776269..4767293bd8 100644 --- a/src/sql/optimizer/ob_opt_est_utils.h +++ b/src/sql/optimizer/ob_opt_est_utils.h @@ -93,7 +93,8 @@ public: const ObRawExpr *esp_expr, ObExecContext *exec_ctx, ObIAllocator &allocator, - bool &is_start_with); + bool &is_start_with, + bool &all_is_percent_sign); //whether the value of first_expr and second_expr is equal. diff --git a/src/sql/optimizer/ob_opt_selectivity.cpp b/src/sql/optimizer/ob_opt_selectivity.cpp index 14cf067205..cb1d44ae7f 100644 --- a/src/sql/optimizer/ob_opt_selectivity.cpp +++ b/src/sql/optimizer/ob_opt_selectivity.cpp @@ -30,6 +30,7 @@ #include "common/ob_smart_call.h" #include "share/stat/ob_dbms_stats_utils.h" #include "sql/optimizer/ob_access_path_estimation.h" +#include "sql/optimizer/ob_sel_estimator.h" using namespace oceanbase::common; using namespace oceanbase::share::schema; @@ -81,6 +82,8 @@ int OptTableMeta::assign(const OptTableMeta &other) rows_ = other.rows_; stat_type_ = other.stat_type_; ds_level_ = other.ds_level_; + stat_locked_ = other.stat_locked_; + distinct_rows_ = other.distinct_rows_; if (OB_FAIL(all_used_parts_.assign(other.all_used_parts_))) { LOG_WARN("failed to assign all used parts", K(ret)); @@ -166,14 +169,11 @@ int OptTableMeta::init_column_meta(const OptSelectivityCtx &ctx, int ret = OB_SUCCESS; ObGlobalColumnStat stat; bool is_single_pkey = (1 == pk_ids_.count() && pk_ids_.at(0) == column_id); - int64_t global_ndv = 0; - int64_t num_null = 0; if (is_single_pkey) { - global_ndv = rows_; - num_null = 0; + col_meta.set_ndv(rows_); + col_meta.set_num_null(0); } else if (use_default_stat()) { - global_ndv = std::min(rows_, 100.0); - num_null = rows_ * EST_DEF_COL_NULL_RATIO; + col_meta.set_default_meta(rows_); } else if (OB_ISNULL(ctx.get_opt_stat_manager()) || OB_ISNULL(ctx.get_session_info())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(ctx.get_opt_stat_manager()), @@ -188,24 +188,21 @@ int OptTableMeta::init_column_meta(const OptSelectivityCtx &ctx, stat))) { LOG_WARN("failed to get column stats", K(ret)); } else if (0 == stat.ndv_val_ && 0 == stat.null_val_) { - global_ndv = std::min(rows_, 100.0); - num_null = rows_ * EST_DEF_COL_NULL_RATIO; + col_meta.set_default_meta(rows_); } else if (0 == stat.ndv_val_ && stat.null_val_ > 0) { - global_ndv = 1; - num_null = stat.null_val_; + col_meta.set_ndv(1); + col_meta.set_num_null(stat.null_val_); } else { - global_ndv = stat.ndv_val_; - num_null = stat.null_val_; + col_meta.set_ndv(stat.ndv_val_); + col_meta.set_num_null(stat.null_val_); } if (OB_SUCC(ret)) { - col_meta.init(column_id, - global_ndv, - num_null, - stat.avglen_val_, - stat.cg_macro_blk_cnt_, - stat.cg_micro_blk_cnt_, - stat.cg_skip_rate_); + col_meta.set_column_id(column_id); + col_meta.set_avg_len(stat.avglen_val_); + col_meta.set_cg_macro_blk_cnt(stat.cg_macro_blk_cnt_); + col_meta.set_cg_micro_blk_cnt(stat.cg_micro_blk_cnt_); + col_meta.set_cg_skip_rate(stat.cg_skip_rate_); } return ret; } @@ -237,6 +234,14 @@ const OptColumnMeta* OptTableMeta::get_column_meta(const uint64_t column_id) con return column_meta; } +void OptTableMeta::set_ndv_for_all_column(double ndv) +{ + for (int64_t i = 0; i < column_metas_.count(); ++i) { + column_metas_.at(i).set_ndv(ndv); + } + return; +} + int OptTableMetas::copy_table_meta_info(const OptTableMeta &src_meta, OptTableMeta *&dst_meta) { int ret = OB_SUCCESS; @@ -274,7 +279,8 @@ int OptTableMetas::add_base_table_meta_info(OptSelectivityCtx &ctx, const OptTableStatType stat_type, ObIArray &all_used_global_parts, const double scale_ratio, - int64_t last_analyzed) + int64_t last_analyzed, + bool is_stat_locked) { int ret = OB_SUCCESS; ObSqlSchemaGuard *schema_guard = ctx.get_sql_schema_guard(); @@ -291,6 +297,7 @@ int OptTableMetas::add_base_table_meta_info(OptSelectivityCtx &ctx, LOG_WARN("failed to init new tstat", K(ret)); } else { table_meta->set_version(last_analyzed); + table_meta->set_stat_locked(is_stat_locked); LOG_TRACE("add base table meta info success", K(*table_meta)); } return ret; @@ -299,7 +306,7 @@ int OptTableMetas::add_base_table_meta_info(OptSelectivityCtx &ctx, // set stmt child is select stmt, not generate table. To mentain meta info for set stmt, // mock a table for set stmt child. e.g. first child stmt use table id = 1, second child stmt // use table_id = 2, ... -int OptTableMetas::add_set_child_stmt_meta_info(const ObDMLStmt *parent_stmt, +int OptTableMetas::add_set_child_stmt_meta_info(const ObSelectStmt *parent_stmt, const ObSelectStmt *child_stmt, const uint64_t table_id, const OptTableMetas &child_table_metas, @@ -310,6 +317,7 @@ int OptTableMetas::add_set_child_stmt_meta_info(const ObDMLStmt *parent_stmt, OptTableMeta *table_meta = NULL; OptColumnMeta *column_meta = NULL; ObSEArray exprs; + ObSEArray select_exprs; ObRawExpr *select_expr = NULL; if (OB_ISNULL(parent_stmt) || OB_ISNULL(child_stmt)) { ret = OB_ERR_UNEXPECTED; @@ -318,7 +326,7 @@ int OptTableMetas::add_set_child_stmt_meta_info(const ObDMLStmt *parent_stmt, ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate place holder for table meta", K(ret)); } else { - const double table_rows = child_rows < 1.0 ? 1.0 : child_rows; + const double table_rows = child_rows; table_meta->set_table_id(table_id); table_meta->set_ref_table_id(OB_INVALID_ID); table_meta->set_rows(table_rows); @@ -330,6 +338,8 @@ int OptTableMetas::add_set_child_stmt_meta_info(const ObDMLStmt *parent_stmt, if (OB_ISNULL(select_expr = child_stmt->get_select_items().at(i).expr_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get null select expr", K(ret)); + } else if (OB_FAIL(select_exprs.push_back(select_expr))) { + LOG_WARN("Failed to push back select expr", K(ret)); } else if (select_expr->is_set_op_expr()) { const int64_t set_epxr_idx = static_cast(select_expr)->get_idx(); if (OB_FAIL(get_set_stmt_output_statistics(*child_stmt, child_table_metas, @@ -357,6 +367,24 @@ int OptTableMetas::add_set_child_stmt_meta_info(const ObDMLStmt *parent_stmt, } } } + if (OB_SUCC(ret)) { + double distinct_rows = 0.0; + if (child_stmt->is_set_stmt()) { + if (OB_FAIL(get_set_stmt_output_ndv(*child_stmt, child_table_metas, distinct_rows))) { + LOG_WARN("failed to get set stmt output statistics", K(ret)); + } + } else { + if (parent_stmt->is_set_distinct() && + OB_FAIL(ObOptSelectivity::calculate_distinct(child_table_metas, + child_ctx, + select_exprs, + table_rows, + distinct_rows))) { + LOG_WARN("failed to calculate distinct", K(ret)); + } + } + table_meta->set_distinct_rows(distinct_rows); + } if (OB_SUCC(ret)) { LOG_TRACE("succeed add set table meta info", K(child_table_metas), K(*this)); } @@ -428,14 +456,16 @@ int OptTableMetas::add_generate_table_meta_info(const ObDMLStmt *parent_stmt, } else { column_meta->init(column_item.column_id_, revise_ndv(ndv), num_null, avg_len); column_meta->set_min_max_inited(true); - if (select_expr->is_column_ref_expr()) { - ObColumnRefRawExpr *col = static_cast(select_expr); - const OptColumnMeta *child_column_meta = child_table_metas.get_column_meta_by_table_id( - col->get_table_id(), col->get_column_id()); - if (OB_NOT_NULL(child_column_meta) && child_column_meta->get_min_max_inited()) { - column_meta->set_min_value(child_column_meta->get_min_value()); - column_meta->set_max_value(child_column_meta->get_max_value()); - } + ObObj maxobj; + ObObj minobj; + maxobj.set_max_value(); + minobj.set_min_value(); + if (select_expr->is_column_ref_expr() && + OB_FAIL(ObOptSelectivity::get_column_min_max(child_table_metas, child_ctx, *select_expr, minobj, maxobj))) { + LOG_WARN("failed to get column min max", K(ret)); + } else { + column_meta->set_min_value(minobj); + column_meta->set_max_value(maxobj); } } } @@ -469,39 +499,55 @@ int OptTableMetas::get_set_stmt_output_statistics(const ObSelectStmt &stmt, } else if (OB_ISNULL(column_meta = child_table_metas.get_column_meta_by_table_id(i, column_id))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get null column meta info", K(ret)); - } else if (ObSelectStmt::SetOperator::UNION == stmt.get_set_op()) { - // ndv1 + ndv2 - ndv += column_meta->get_ndv(); - num_null += column_meta->get_num_null(); - avg_len = std::max(avg_len, column_meta->get_avg_len()); - } else if (ObSelectStmt::SetOperator::INTERSECT == stmt.get_set_op()) { - // min(ndv1, ndv2) + } else { + double cur_ndv = column_meta->get_ndv(); + double cur_num_null = ObSelectStmt::SetOperator::UNION == stmt.get_set_op() && !stmt.is_set_distinct() ? + column_meta->get_num_null() : std::min(column_meta->get_num_null(), 1.0); + double cur_avg_len = column_meta->get_avg_len(); if (0 == i) { - ndv = column_meta->get_ndv(); - num_null = column_meta->get_num_null(); - avg_len = column_meta->get_avg_len(); + ndv = cur_ndv; + num_null = cur_num_null; + avg_len = cur_avg_len; } else { - ndv = std::min(ndv, column_meta->get_ndv()); - num_null = std::min(num_null, column_meta->get_num_null()); - avg_len = std::min(avg_len, column_meta->get_avg_len()); + ObSelectStmt::SetOperator set_type = stmt.is_recursive_union() ? ObSelectStmt::SetOperator::RECURSIVE : stmt.get_set_op(); + ndv = ObOptSelectivity::get_set_stmt_output_count(ndv, cur_ndv, set_type); + num_null = ObOptSelectivity::get_set_stmt_output_count(num_null, cur_num_null, set_type); + if (ObSelectStmt::SetOperator::UNION == set_type) { + avg_len = std::max(avg_len, cur_avg_len); + } else if (ObSelectStmt::SetOperator::INTERSECT == set_type) { + avg_len = std::min(avg_len, cur_avg_len); + } else if (ObSelectStmt::SetOperator::EXCEPT == set_type) { + avg_len = std::min(avg_len, cur_avg_len); + } else if (ObSelectStmt::SetOperator::RECURSIVE == set_type) { + avg_len = std::max(avg_len, cur_avg_len); + } } - } else if (ObSelectStmt::SetOperator::EXCEPT == stmt.get_set_op()) { - // max(ndv1 - ndv2, 1) - if (0 == i) { - ndv = column_meta->get_ndv(); - num_null = column_meta->get_num_null(); - avg_len = column_meta->get_avg_len(); - } else { - ndv = std::max(ndv - column_meta->get_ndv(), 1.0); - num_null = std::max(num_null - column_meta->get_num_null(), 1.0); - avg_len = std::min(avg_len, column_meta->get_avg_len()); - } - } else if (ObSelectStmt::SetOperator::RECURSIVE == stmt.get_set_op()) { - // ndv1 - ndv = column_meta->get_ndv(); - num_null = column_meta->get_num_null(); - avg_len = column_meta->get_avg_len(); - break; + } + } + return ret; +} + +int OptTableMetas::get_set_stmt_output_ndv(const ObSelectStmt &stmt, + const OptTableMetas &child_table_metas, + double &ndv) +{ + int ret = OB_SUCCESS; + ndv = 0; + const OptTableMeta *table_meta = NULL; + ObSelectStmt::SetOperator set_type = stmt.is_recursive_union() ? ObSelectStmt::SetOperator::RECURSIVE : stmt.get_set_op(); + for (int64_t i = 0; OB_SUCC(ret) && i < stmt.get_set_query().count(); ++i) { + if (OB_ISNULL(stmt.get_set_query().at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null set query", K(ret)); + } else if (OB_INVALID_ID != stmt.get_set_query().at(i)->get_dblink_id()) { + // skip + } else if (OB_ISNULL(table_meta = child_table_metas.get_table_meta_by_table_id(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null table meta info", K(ret)); + } else if (0 == i) { + ndv = table_meta->get_distinct_rows(); + } else { + ndv = ObOptSelectivity::get_set_stmt_output_count(ndv, table_meta->get_distinct_rows(), set_type); } } return ret; @@ -540,7 +586,6 @@ const OptColumnMeta* OptTableMetas::get_column_meta_by_table_id(const uint64_t t return column_meta; } - int ObOptSelectivity::calculate_selectivity(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObIArray &predicates, @@ -549,83 +594,53 @@ int ObOptSelectivity::calculate_selectivity(const OptTableMetas &table_metas, { int ret = OB_SUCCESS; selectivity = 1.0; - bool is_calculated = false; - ObSEArray join_conditions; - ObSEArray range_conditions; - ObRawExpr *qual = NULL; - double tmp_selectivity = 1.0; - bool need_skip = false; - //we calc some complex predicates selectivity by dynamic sampling - if (OB_FAIL(calc_complex_predicates_selectivity_by_ds(table_metas, ctx, predicates, - all_predicate_sel))) { - LOG_WARN("failed to calc complex predicates selectivity by ds", K(ret)); - } + ObSEArray sel_estimators; + ObSEArray selectivities; + ObArenaAllocator tmp_alloc("ObOptSel"); + ObSelEstimatorFactory factory(tmp_alloc); for (int64_t i = 0; OB_SUCC(ret) && i < predicates.count(); ++i) { - qual = predicates.at(i); - LOG_TRACE("calculate qual selectivity", "expr", PNAME(qual)); - if (OB_ISNULL(qual)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret)); - } else if (OB_FAIL(check_qual_later_calculation(table_metas, ctx, *qual, all_predicate_sel, - join_conditions,range_conditions, need_skip))) { - LOG_WARN("failed to check qual later calculation", K(ret)); - } else if (need_skip) { + const ObRawExpr *qual = predicates.at(i); + ObSelEstimator *estimator = NULL; + double single_sel = false; + if (OB_FAIL(factory.create_estimator(ctx, qual, estimator))) { + LOG_WARN("failed to create estimator", KPC(qual)); + } else if (OB_FAIL(ObSelEstimator::append_estimators(sel_estimators, estimator))) { + LOG_WARN("failed to append estimators", KPC(qual)); + } else if (ObOptimizerUtil::find_item(all_predicate_sel, ObExprSelPair(qual, 0))) { // do nothing - } else if (OB_FAIL(calculate_qual_selectivity(table_metas, ctx, *qual, - tmp_selectivity, all_predicate_sel))) { - LOG_WARN("failed to calculate one qual selectivity", K(*qual), K(ret)); + } else if (OB_FAIL(estimator->get_sel(table_metas, ctx, single_sel, all_predicate_sel))) { + LOG_WARN("failed to calculate one qual selectivity", KPC(estimator), K(qual), K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(all_predicate_sel, ObExprSelPair(qual, single_sel)))) { + LOG_WARN("fail ed to add selectivity to plan", K(ret), K(qual), K(selectivity)); } else { - tmp_selectivity = revise_between_0_1(tmp_selectivity); - selectivity *= tmp_selectivity; + // We remember each predicate's selectivity in the plan so that we can reorder them + // in the vector of filters according to their selectivity. + LOG_PRINT_EXPR(TRACE, "calculate one qual selectivity", *qual, K(single_sel)); } - LOG_TRACE("after calculate one qual selectivity", K(need_skip), K(tmp_selectivity), K(selectivity)); } - for (int64_t i = 0 ; OB_SUCC(ret) && i < range_conditions.count() ; ++i) { - tmp_selectivity = 1.0; - ObColumnRefRawExpr *col_expr = range_conditions.at(i).column_expr_; - if (OB_ISNULL(col_expr)) { + if (OB_SUCC(ret) && OB_FAIL(selectivities.prepare_allocate(sel_estimators.count()))) { + LOG_WARN("failed to prepare allocate", K(ret), K(selectivities), K(sel_estimators)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < sel_estimators.count(); ++i) { + ObSelEstimator *estimator = sel_estimators.at(i); + double tmp_selectivity = 0.0; + if (OB_ISNULL(sel_estimators.at(i))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (OB_FAIL(get_column_range_sel(table_metas, ctx, *col_expr, - range_conditions.at(i).range_exprs_, - tmp_selectivity))) { - LOG_WARN("failed to get column range selectivity", K(ret)); + LOG_WARN("estimator is null", K(ret), K(sel_estimators)); + } else if (OB_FAIL(estimator->get_sel(table_metas, ctx, tmp_selectivity, all_predicate_sel))) { + LOG_WARN("failed to get sel", K(ret), KPC(estimator)); } else { - tmp_selectivity = revise_between_0_1(tmp_selectivity); - selectivity *= tmp_selectivity; - if (range_conditions.at(i).range_exprs_.count() > 1) { + selectivities.at(i) = tmp_selectivity; + if (ObSelEstType::RANGE == estimator->get_type()) { + ObRangeSelEstimator *range_estimator = static_cast(estimator); if (OB_FAIL(add_var_to_array_no_dup(all_predicate_sel, - ObExprSelPair(col_expr, tmp_selectivity, true)))) { - LOG_WARN("failed to add selectivity to plan", K(ret), K(qual), K(tmp_selectivity)); + ObExprSelPair(range_estimator->get_column_expr(), tmp_selectivity, true)))) { + LOG_WARN("failed to add selectivity to plan", K(ret), KPC(range_estimator), K(tmp_selectivity)); } } } } - if (OB_SUCC(ret) && ctx.get_left_rel_ids() != NULL && ctx.get_right_rel_ids() != NULL) { - tmp_selectivity = 1.0; - if (1 == join_conditions.count()) { - // only one join condition, calculate selectivity directly - if (OB_ISNULL(join_conditions.at(0))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (OB_FAIL(get_equal_sel(table_metas, ctx, *join_conditions.at(0), tmp_selectivity))) { - LOG_WARN("Failed to get equal selectivity", K(ret)); - } else { - LOG_PRINT_EXPR(TRACE, "get single equal expr selectivity", *join_conditions.at(0), K(tmp_selectivity)); - } - } else if (join_conditions.count() > 1) { - // 存在多个连接条件,检查是否涉及联合主键 - if (OB_FAIL(get_equal_sel(table_metas, ctx, join_conditions, tmp_selectivity))) { - LOG_WARN("failed to get equal sel"); - } else { - LOG_TRACE("get multi equal expr selectivity", K(join_conditions), K(tmp_selectivity)); - } - } - if (OB_SUCC(ret)) { - tmp_selectivity = revise_between_0_1(tmp_selectivity); - selectivity *= tmp_selectivity; - } - } + selectivity = ObOptSelectivity::get_filters_selectivity(selectivities, ctx.get_dependency_type()); return ret; } @@ -663,7 +678,6 @@ int ObOptSelectivity::calc_selectivity_by_dynamic_sampling(const OptSelectivityC if (OB_FAIL(ObDynamicSamplingUtils::get_ds_table_param(const_cast(ctx.get_opt_ctx()), ctx.get_plan(), ds_param.table_meta_, - true, ds_table_param, specify_ds))) { LOG_WARN("failed to get ds table param", K(ret), K(ds_table_param)); @@ -808,7 +822,7 @@ int ObOptSelectivity::resursive_extract_valid_predicate_for_ds(const OptTableMet } else if (T_OP_LIKE == qual->get_expr_type()) { bool can_calc_sel = false; double selectivity = 1.0; - if (OB_FAIL(get_like_sel(table_metas, ctx, *qual, selectivity, can_calc_sel))) { + if (OB_FAIL(ObLikeSelEstimator::can_calc_like_sel(ctx, *qual, can_calc_sel))) { LOG_WARN("failed to get like selectivity", K(ret)); } else if (can_calc_sel) { //do nothing @@ -872,77 +886,6 @@ int ObOptSelectivity::add_valid_ds_qual(const ObRawExpr *qual, return ret; } -int ObOptSelectivity::check_qual_later_calculation(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - ObRawExpr &qual, - ObIArray &all_pred_sel, - ObIArray &join_conditions, - ObIArray &range_conditions, - bool &need_skip) -{ - int ret = OB_SUCCESS; - need_skip = false; - if (OB_FAIL(is_simple_join_condition(qual, - ctx.get_left_rel_ids(), - ctx.get_right_rel_ids(), - need_skip, - join_conditions))) { - LOG_WARN("failed to check is simple join condition", K(ret)); - } else if (!need_skip && OB_FAIL(ObOptEstUtils::extract_simple_cond_filters(qual, - need_skip, - range_conditions))) { - LOG_WARN("failed to extract simple cond filters", K(ret)); - } else if (need_skip) { - // calculate qual selectivity if qual not in all_predicate_sel - double tmp_sel = 1.0; - if (ObOptimizerUtil::find_item(all_pred_sel, ObExprSelPair(&qual, 0))) { - // do nothing - } else if (OB_FAIL(calculate_qual_selectivity(table_metas, ctx, qual, tmp_sel, all_pred_sel))) { - LOG_WARN("failed to calculate one qual selectivity", K(qual), K(ret)); - } - } - return ret; -} - -/** - * check if qual is a simple join condition. - * This recommend each side of `=` belong to different subtree. - */ -int ObOptSelectivity::is_simple_join_condition(ObRawExpr &qual, - const ObRelIds *left_rel_ids, - const ObRelIds *right_rel_ids, - bool &is_valid, - ObIArray &join_conditions) -{ - int ret = OB_SUCCESS; - is_valid = false; - if (NULL == left_rel_ids || NULL == right_rel_ids) { - // do nothing - } else if (T_OP_EQ == qual.get_expr_type() || T_OP_NSEQ == qual.get_expr_type()) { - ObRawExpr *expr0 = qual.get_param_expr(0); - ObRawExpr *expr1 = qual.get_param_expr(1); - if (OB_ISNULL(expr0) || OB_ISNULL(expr1)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null exprs", K(ret), K(expr0), K(expr1)); - } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(expr0, expr0)) || - OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(expr1, expr1))) { - LOG_WARN("failed to get expr without lossless cast", K(ret)); - } else if (!expr0->is_column_ref_expr() || !expr1->is_column_ref_expr()) { - // do nothing - } else if ((left_rel_ids->is_superset(expr0->get_relation_ids()) && - right_rel_ids->is_superset(expr1->get_relation_ids())) || - (left_rel_ids->is_superset(expr1->get_relation_ids()) && - right_rel_ids->is_superset(expr0->get_relation_ids()))) { - if (OB_FAIL(join_conditions.push_back(&qual))) { - LOG_WARN("failed to push back expr", K(ret)); - } else { - is_valid = true; - } - } else { /* do nothing */ } - } - return ret; -} - int ObOptSelectivity::calculate_qual_selectivity(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObRawExpr &qual, @@ -950,109 +893,21 @@ int ObOptSelectivity::calculate_qual_selectivity(const OptTableMetas &table_meta ObIArray &all_predicate_sel) { int ret = OB_SUCCESS; - selectivity = 1.0; - double tmp_sel = 1.0; - int64_t idx = 0; - if (qual.has_flag(CNT_AGG)) { - if (OB_FAIL(get_agg_sel(table_metas, ctx, qual, selectivity))) { - LOG_WARN("failed to get agg expr selectivity", K(ret), K(qual)); - } - } else if (qual.is_const_expr()) { - if (OB_FAIL(get_const_sel(ctx, qual, selectivity))) { - LOG_WARN("failed to get const expr selectivity", K(ret), K(qual)); - } - } else if (qual.is_column_ref_expr()) { - if (OB_FAIL(get_column_sel(table_metas, ctx, qual, selectivity))) { - LOG_WARN("failed to get column selectivity", K(ret), K(qual)); - } - } else if (T_OP_EQ == qual.get_expr_type() || T_OP_NSEQ == qual.get_expr_type()) { - if (OB_FAIL(get_equal_sel(table_metas, ctx, qual, selectivity))) { - LOG_WARN("failed to get equal selectivity", K(ret)); - } - } else if (T_OP_IN == qual.get_expr_type() || T_OP_NOT_IN == qual.get_expr_type()) { - if (OB_FAIL(get_in_sel(table_metas, ctx, qual, selectivity))) { - LOG_WARN("failed to get in selectivity", K(ret)); - } - } else if (T_OP_IS == qual.get_expr_type() || T_OP_IS_NOT == qual.get_expr_type()) { - if (OB_FAIL(get_is_sel(table_metas, ctx, qual, selectivity))) { - LOG_WARN("failed to get is selectivity", K(ret)); - } - } else if (IS_RANGE_CMP_OP(qual.get_expr_type())) { - if (OB_FAIL(get_range_cmp_sel(table_metas, ctx, qual, selectivity))) { - LOG_WARN("failed to get range cmp selectivity", K(ret)); - } - } else if (T_OP_LIKE == qual.get_expr_type()) { - bool can_calc_sel = false; - if (OB_FAIL(get_like_sel(table_metas, ctx, qual, selectivity, can_calc_sel))) { - LOG_WARN("failed to get like selectivity", K(ret)); - } else if (can_calc_sel) {//do nothing - //try find the calc sel from dynamic sampling - } else if (ObOptimizerUtil::find_item(all_predicate_sel, ObExprSelPair(&qual, 0), &idx)) { - selectivity = all_predicate_sel.at(idx).sel_; - } - } else if (T_OP_BTW == qual.get_expr_type() || T_OP_NOT_BTW == qual.get_expr_type()) { - if (OB_FAIL(get_btw_sel(table_metas, ctx, qual, selectivity))) { - LOG_WARN("failed to get between selectivity", K(ret)); - } - } else if (T_OP_NOT == qual.get_expr_type()) { - if (OB_FAIL(get_not_sel(table_metas, ctx, qual, selectivity, all_predicate_sel))) { - LOG_WARN("failed to get not selectivity", K(ret)); - } - } else if (T_OP_NE == qual.get_expr_type()) { - if (OB_FAIL(get_ne_sel(table_metas, ctx, qual, selectivity))) { - LOG_WARN("failed to get not selectivity", K(ret)); - } - } else if (T_OP_AND == qual.get_expr_type()) { - double tmp_selectivity = 1.0; - for (int64_t i = 0; OB_SUCC(ret) && i < qual.get_param_count(); ++i) { - const ObRawExpr *child_expr = qual.get_param_expr(i); - if (OB_ISNULL(child_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret)); - } else if (OB_FAIL(calculate_qual_selectivity(table_metas, ctx, *child_expr, - tmp_selectivity, all_predicate_sel))) { - LOG_WARN("failed to callculate one qual selectivity", K(child_expr), K(ret)); - } else { - selectivity *= tmp_selectivity; - } - } - } else if (T_OP_OR == qual.get_expr_type()) { - double tmp_selectivity = 1.0; - selectivity = 0; - bool is_mutex = false;; - if (OB_FAIL(check_mutex_or(qual, is_mutex))) { - LOG_WARN("failed to check mutex or", K(ret)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < qual.get_param_count(); ++i) { - const ObRawExpr *child_expr = qual.get_param_expr(i); - if (OB_ISNULL(child_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret)); - } else if (OB_FAIL(calculate_qual_selectivity(table_metas, ctx, *child_expr, - tmp_selectivity, all_predicate_sel))) { - LOG_WARN("failed to callculate one qual selectivity", K(child_expr), K(ret)); - } else if (0 == i || is_mutex) { - selectivity += tmp_selectivity; - } else { - selectivity += tmp_selectivity - tmp_selectivity * selectivity; - } - } - } - } else if (qual.is_spatial_expr()) { - selectivity = DEFAULT_SPATIAL_SEL; - } else if (ObOptimizerUtil::find_item(all_predicate_sel, ObExprSelPair(&qual, 0), &idx)) { - selectivity = all_predicate_sel.at(idx).sel_; - } else { //任何处理不了的表达式,都认为是0.5的选择率 } else { //任何处理不了的表达式,都认为是0.5的选择率 - selectivity = DEFAULT_SEL; - } - selectivity = revise_between_0_1(selectivity); - if (OB_SUCC(ret)) { - LOG_PRINT_EXPR(TRACE, "calculate one qual selectivity", qual, K(selectivity)); + ObArenaAllocator tmp_alloc("ObOptSel"); + ObSelEstimatorFactory factory(tmp_alloc); + ObSelEstimator *estimator = NULL; + if (OB_FAIL(factory.create_estimator(ctx, &qual, estimator))) { + LOG_WARN("failed to create estimator", K(qual)); + } else if (OB_FAIL(estimator->get_sel(table_metas, ctx, selectivity, all_predicate_sel))) { + LOG_WARN("failed to calculate one qual selectivity", KPC(estimator), K(qual), K(ret)); + } else if (FALSE_IT(selectivity = revise_between_0_1(selectivity))) { + // never reach + } else if (OB_FAIL(add_var_to_array_no_dup(all_predicate_sel, ObExprSelPair(&qual, selectivity)))) { + LOG_WARN("fail ed to add selectivity to plan", K(ret), K(qual), K(selectivity)); + } else { // We remember each predicate's selectivity in the plan so that we can reorder them // in the vector of filters according to their selectivity. - if (OB_FAIL(add_var_to_array_no_dup(all_predicate_sel, ObExprSelPair(&qual, selectivity)))) { - LOG_WARN("failed to add selectivity to plan", K(ret), K(qual), K(selectivity)); - } + LOG_PRINT_EXPR(TRACE, "calculate one qual selectivity", qual, K(selectivity)); } return ret; } @@ -1081,7 +936,7 @@ int ObOptSelectivity::update_table_meta_info(const OptTableMetas &base_table_met table_meta->set_rows(filtered_rows); if (filtered_rows >= origin_rows) { // only update table rows - } else if (OB_FAIL(classify_quals(quals, all_predicate_sel, column_sel_infos))) { + } else if (OB_FAIL(classify_quals(ctx, quals, all_predicate_sel, column_sel_infos))) { LOG_WARN("failed to classify quals", K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < table_meta->get_column_metas().count(); ++i) { @@ -1148,6 +1003,23 @@ int ObOptSelectivity::update_table_meta_info(const OptTableMetas &base_table_met column_meta.set_num_null(null_num); column_meta.set_hist_scale(hist_scale); } + + if (OB_SUCC(ret) && OB_NOT_NULL(sel_info)) { + if (sel_info->max_ < sel_info->min_ || + sel_info->max_ < column_meta.get_min_value() || + sel_info->min_ > column_meta.get_max_value()) { + // invalid min max + column_meta.get_min_value().set_min_value(); + column_meta.get_max_value().set_max_value(); + } else { + if (!sel_info->min_.is_null() && sel_info->min_ > column_meta.get_min_value()) { + column_meta.set_min_value(sel_info->min_); + } + if (!sel_info->max_.is_null() && sel_info->max_ < column_meta.get_max_value()) { + column_meta.set_max_value(sel_info->max_); + } + } + } } } } @@ -1155,749 +1027,6 @@ int ObOptSelectivity::update_table_meta_info(const OptTableMetas &base_table_met return ret; } -int ObOptSelectivity::get_const_sel(const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) -{ - int ret = OB_SUCCESS; - const ParamStore *params = ctx.get_params(); - const ObDMLStmt *stmt = ctx.get_stmt(); - if (OB_ISNULL(params) || OB_ISNULL(stmt)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(params), K(stmt)); - } else if (ObOptEstUtils::is_calculable_expr(qual, params->count())) { - ObObj const_value; - bool got_result = false; - bool is_true = false; - if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), - &qual, - const_value, - got_result, - ctx.get_allocator()))) { - LOG_WARN("failed to calc const or calculable expr", K(ret)); - } else if (!got_result) { - selectivity = DEFAULT_SEL; - } else if (OB_FAIL(ObObjEvaluator::is_true(const_value, is_true))) { - LOG_WARN("failed to check is const value true", K(ret)); - } else { - selectivity = is_true ? 1.0 : 0.0; - } - } else { - selectivity = DEFAULT_SEL; - } - return ret; -} - -int ObOptSelectivity::get_column_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) -{ - int ret = OB_SUCCESS; - selectivity = DEFAULT_SEL; - double distinct_sel = 0.0; - double null_sel = 0.0; - if (!ob_is_string_or_lob_type(qual.get_data_type())) { - if (OB_FAIL(check_column_in_current_level_stmt(ctx.get_stmt(), qual))) { - LOG_WARN("Failed to check column in cur level stmt", K(ret)); - } else if (OB_FAIL(get_column_basic_sel(table_metas, ctx, qual, &distinct_sel, &null_sel))) { - LOG_WARN("Failed to calc basic equal sel", K(ret)); - } else { - selectivity = 1.0 - distinct_sel - null_sel; - } - } - return ret; -} - -int ObOptSelectivity::get_equal_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) -{ - int ret = OB_SUCCESS; - const ObRawExpr *left_expr = qual.get_param_expr(0); - const ObRawExpr *right_expr = qual.get_param_expr(1); - if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret), K(qual), K(left_expr), K(right_expr)); - } else if (OB_FAIL(get_equal_sel(table_metas, ctx, *left_expr, *right_expr, - T_OP_NSEQ == qual.get_expr_type(), selectivity))) { - LOG_WARN("failed to get equal sel", K(ret)); - } - return ret; -} - -int ObOptSelectivity::get_equal_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &left_expr, - const ObRawExpr &right_expr, - const bool null_safe, - double &selectivity) -{ - int ret = OB_SUCCESS; - if (T_OP_ROW == left_expr.get_expr_type() && T_OP_ROW == right_expr.get_expr_type()) { - // normally row equal row will unnest as `var = var and var = var ...` - selectivity = 1.0; - double tmp_selectivity = 1.0; - const ObRawExpr *l_expr = NULL; - const ObRawExpr *r_expr = NULL; - const ObRawExpr *l_row = &left_expr; - const ObRawExpr *r_row = &right_expr; - // (c1, c2) in ((const1, const2)) may transform to (c1, c2) = ((const1, const2)) - if (left_expr.get_param_count() == 1 && OB_NOT_NULL(left_expr.get_param_expr(0)) && - T_OP_ROW == left_expr.get_param_expr(0)->get_expr_type()) { - l_row = left_expr.get_param_expr(0); - } - if (right_expr.get_param_count() == 1 && OB_NOT_NULL(right_expr.get_param_expr(0)) && - T_OP_ROW == right_expr.get_param_expr(0)->get_expr_type()) { - r_row = right_expr.get_param_expr(0); - } - if (OB_UNLIKELY(l_row->get_param_count() != r_row->get_param_count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected expr", KPC(l_row), KPC(l_row), K(ret)); - } else { - int64_t num = l_row->get_param_count(); - for (int64_t i = 0; OB_SUCC(ret) && i < num; ++i) { - if (OB_ISNULL(l_expr = l_row->get_param_expr(i)) || - OB_ISNULL(r_expr = r_row->get_param_expr(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret), K(l_expr), K(r_expr), K(i)); - } else if (OB_FAIL(SMART_CALL(get_equal_sel(table_metas, ctx, *l_expr, - *r_expr, null_safe, tmp_selectivity)))) { - LOG_WARN("failed to get equal selectivity", K(ret)); - } else { - selectivity *= tmp_selectivity; - } - } - } - } else if ((left_expr.has_flag(CNT_COLUMN) && !right_expr.has_flag(CNT_COLUMN)) || - (!left_expr.has_flag(CNT_COLUMN) && right_expr.has_flag(CNT_COLUMN))) { - // column = const - const ObRawExpr *cnt_col_expr = left_expr.has_flag(CNT_COLUMN) ? &left_expr : &right_expr; - const ObRawExpr &calc_expr = left_expr.has_flag(CNT_COLUMN) ? right_expr : left_expr; - ObOptColumnStatHandle handler; - ObObj expr_value; - bool can_use_hist = false; - if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(cnt_col_expr, cnt_col_expr))) { - LOG_WARN("failed to get expr without lossless cast", K(ret)); - } else if (cnt_col_expr->is_column_ref_expr()) { - const ObColumnRefRawExpr* col = static_cast(cnt_col_expr); - if (OB_FAIL(get_histogram_by_column(table_metas, ctx, col->get_table_id(), - col->get_column_id(), handler))) { - LOG_WARN("failed to get histogram by column", K(ret)); - } else if (handler.stat_ == NULL || !handler.stat_->get_histogram().is_valid()) { - // do nothing - } else if (OB_FAIL(get_compare_value(ctx, col, &calc_expr, expr_value, can_use_hist))) { - // cast may failed due to invalid type or value out of range. - // Then use ndv instead of histogram - can_use_hist = false; - ret = OB_SUCCESS; - } - } - if (OB_SUCC(ret)) { - if (can_use_hist) { - double nns = 0; - double hist_scale = 0; - if (OB_FAIL(get_column_hist_scale(table_metas, ctx, *cnt_col_expr, hist_scale))) { - LOG_WARN("failed to get columnn hist sample scale", K(ret)); - } else if (OB_FAIL(get_equal_pred_sel(handler.stat_->get_histogram(), expr_value, - hist_scale, selectivity))) { - LOG_WARN("Failed to get equal density", K(ret)); - } else if (OB_FAIL(get_column_ndv_and_nns(table_metas, ctx, *cnt_col_expr, NULL, &nns))) { - LOG_WARN("failed to get column ndv and nns", K(ret)); - } else { - selectivity *= nns; - } - } else if (OB_FAIL(get_simple_equal_sel(table_metas, ctx, *cnt_col_expr, - &calc_expr, null_safe, selectivity))) { - LOG_WARN("failed to get simple equal selectivity", K(ret)); - } - LOG_TRACE("succeed to get equal predicate sel", K(can_use_hist), K(selectivity)); - } - } else if (left_expr.has_flag(CNT_COLUMN) && right_expr.has_flag(CNT_COLUMN)) { - if (OB_FAIL(get_cntcol_op_cntcol_sel(table_metas, ctx, left_expr, right_expr, - null_safe ? T_OP_NSEQ : T_OP_EQ, selectivity))) { - LOG_WARN("failed to get contain column equal contain column selectivity", K(ret)); - } else { - LOG_TRACE("succeed to get contain column equal contain column sel", K(selectivity), K(ret)); - } - } else { - // CONST_PARAM = CONST_PARAM - const ParamStore *params = ctx.get_params(); - if (OB_ISNULL(params)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Params is NULL", K(ret)); - } else if (ObOptEstUtils::is_calculable_expr(left_expr, params->count()) && - ObOptEstUtils::is_calculable_expr(right_expr, params->count())) { - // 1 in (c1, 2, 3) will reach this branch - bool equal = false; - if (OB_FAIL(ObOptEstUtils::if_expr_value_equal(const_cast(ctx.get_opt_ctx()), - ctx.get_stmt(), - left_expr, right_expr, null_safe, equal))) { - LOG_WARN("Failed to check hvae equal expr", K(ret)); - } else { - selectivity = equal ? 1.0 : 0.0; - } - } else { - selectivity = DEFAULT_EQ_SEL; - } - } - return ret; -} - -int ObOptSelectivity::get_simple_equal_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &cnt_col_expr, - const ObRawExpr *calculable_expr, - const bool null_safe, - double &selectivity) -{ - int ret = OB_SUCCESS; - ObSEArray column_exprs; - bool only_monotonic_op = true; - const ObColumnRefRawExpr *column_expr = NULL; - double distinct_sel = 1.0; - double null_sel = 1.0; - bool is_null_value = false; - if (OB_FAIL(ObOptEstUtils::extract_column_exprs_with_op_check(&cnt_col_expr, - column_exprs, - only_monotonic_op))) { - LOG_WARN("failed to extract column exprs with op check", K(ret)); - } else if (!only_monotonic_op || column_exprs.count() > 1) { - // cnt_col_expr contain not monotonic op OR has more than 1 column - selectivity = DEFAULT_EQ_SEL; - } else if (OB_UNLIKELY(1 != column_exprs.count()) || - OB_ISNULL(column_expr = column_exprs.at(0))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpect column expr", K(column_exprs), K(cnt_col_expr), K(column_expr)); - } else if (OB_FAIL(get_column_basic_sel(table_metas, ctx, *column_expr, - &distinct_sel, &null_sel))) { - LOG_WARN("failed to get column basic selelectivity", K(ret)); - } else if (NULL == calculable_expr) { - selectivity = distinct_sel; - } else if (OB_FAIL(ObOptEstUtils::if_expr_value_null(ctx.get_params(), - *calculable_expr, - ctx.get_opt_ctx().get_exec_ctx(), - ctx.get_allocator(), - is_null_value))) { - LOG_WARN("failed to check if expr value null", K(ret)); - } else if (!is_null_value) { - selectivity = distinct_sel; - } else if (null_safe) { - selectivity = null_sel; - } else { - selectivity = 0.0; - } - return ret; -} - -int ObOptSelectivity::get_cntcol_op_cntcol_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &input_left_expr, - const ObRawExpr &input_right_expr, - ObItemType op_type, - double &selectivity) -{ - int ret = OB_SUCCESS; - double left_ndv = 1.0; - double right_ndv = 1.0; - double left_nns = 0.0; - double right_nns = 0.0; - selectivity = DEFAULT_EQ_SEL; - const ObRawExpr* left_expr = &input_left_expr; - const ObRawExpr* right_expr = &input_right_expr; - if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(left_expr, left_expr)) || - OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(right_expr, right_expr))) { - LOG_WARN("failed to check is lossless column cast", K(ret)); - } else if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(left_expr), K(right_expr)); - } else if (left_expr->is_column_ref_expr() && right_expr->is_column_ref_expr()) { - const ObColumnRefRawExpr* left_col = NULL; - const ObColumnRefRawExpr* right_col = NULL; - if (OB_FAIL(filter_one_column_by_equal_set(table_metas, ctx, left_expr, left_expr))) { - LOG_WARN("failed filter column by equal set", K(ret)); - } else if (OB_FAIL(filter_one_column_by_equal_set(table_metas, ctx, right_expr, right_expr))) { - LOG_WARN("failed filter column by equal set", K(ret)); - } else if (OB_FAIL(get_column_ndv_and_nns(table_metas, ctx, *left_expr, &left_ndv, &left_nns))) { - LOG_WARN("failed to get column basic sel", K(ret)); - } else if (OB_FAIL(get_column_ndv_and_nns(table_metas, ctx, *right_expr, - &right_ndv, &right_nns))) { - LOG_WARN("failed to get column basic sel", K(ret)); - } else if (FALSE_IT(left_col = static_cast(left_expr)) || - FALSE_IT(right_col = static_cast(right_expr))) { - // never reach - } else if (left_expr->get_relation_ids() == right_expr->get_relation_ids()) { - if (left_col->get_column_id() == right_col->get_column_id()) { - // same table same column - if (T_OP_NSEQ == op_type) { - selectivity = 1.0; - } else if (T_OP_EQ == op_type) { - selectivity = left_nns; - } else if (T_OP_NE == op_type) { - selectivity = 0.0; - } - } else { - //same table different column - if (T_OP_NSEQ == op_type) { - selectivity = left_nns * right_nns / std::max(left_ndv, right_ndv) - + (1 - left_nns) * (1 - right_nns); - } else if (T_OP_EQ == op_type) { - selectivity = left_nns * right_nns / std::max(left_ndv, right_ndv); - } else if (T_OP_NE == op_type) { - selectivity = left_nns * right_nns * (1 - 1/std::max(left_ndv, right_ndv)); - } - } - } else { - // different table - ObOptColumnStatHandle left_handler; - ObOptColumnStatHandle right_handler; - obj_cmp_func cmp_func = NULL; - bool calc_with_hist = false; - if (!ObObjCmpFuncs::can_cmp_without_cast(left_col->get_result_type(), - right_col->get_result_type(), - CO_EQ, cmp_func)) { - // do nothing - } else if (OB_FAIL(get_histogram_by_column(table_metas, ctx, left_col->get_table_id(), - left_col->get_column_id(), left_handler))) { - LOG_WARN("failed to get histogram by column", K(ret)); - } else if (OB_FAIL(get_histogram_by_column(table_metas, ctx, right_col->get_table_id(), - right_col->get_column_id(), right_handler))) { - LOG_WARN("failed to get histogram by column", K(ret)); - } else if (left_handler.stat_ != NULL && right_handler.stat_ != NULL && - left_handler.stat_->get_histogram().is_frequency() && - right_handler.stat_->get_histogram().is_frequency()) { - calc_with_hist = true; - } - if (OB_FAIL(ret)) { - } else if (IS_SEMI_ANTI_JOIN(ctx.get_join_type())) { - if (OB_ISNULL(ctx.get_left_rel_ids()) || OB_ISNULL(ctx.get_right_rel_ids())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ctx.get_left_rel_ids()), K(ctx.get_right_rel_ids())); - } else if (left_expr->get_relation_ids().overlap(*ctx.get_right_rel_ids()) || - right_expr->get_relation_ids().overlap(*ctx.get_left_rel_ids())) { - std::swap(left_ndv, right_ndv); - std::swap(left_nns, right_nns); - } - if (OB_SUCC(ret)) { - if (calc_with_hist) { - double total_rows = 0; - double left_rows = 0; - double left_null = 0; - double right_rows = 0; - double right_null = 0; - if (OB_FAIL(get_join_pred_rows(left_handler.stat_->get_histogram(), - right_handler.stat_->get_histogram(), - true, total_rows))) { - LOG_WARN("failed to get join pred rows", K(ret)); - } else if (OB_FAIL(get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), ctx, - *left_expr, NULL, &left_null, NULL, &left_rows))) { - LOG_WARN("failed to get column basic info", K(ret)); - } else if (OB_FAIL(get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), ctx, - *right_expr, NULL, &right_null, NULL, &right_rows))) { - LOG_WARN("failed to get column basic info", K(ret)); - } else if (T_OP_NSEQ == op_type) { - total_rows += right_null > 0 ? left_null : 0; - selectivity = total_rows / left_rows; - } else if (T_OP_EQ == op_type) { - selectivity = total_rows / left_rows; - } else if (T_OP_NE == op_type) { - selectivity = ((left_rows - left_null) * (right_rows - right_null) - total_rows) - / left_rows / right_rows; - } - } else { - /** - * ## non NULL safe - * a) semi: `(min(ndv1, ndv2) / ndv1) * (1.0 - nullfrac1)` - * ## NULL safe - * a) semi: `(min(ndv1, ndv2) / ndv1) * (1.0 - nullfrac1) + nullfrac2 > 0 && nullsafe ? nullfrac1: 0` - */ - if (IS_LEFT_SEMI_ANTI_JOIN(ctx.get_join_type())) { - if (T_OP_NSEQ == op_type) { - selectivity = (std::min(left_ndv, right_ndv) / left_ndv) * left_nns; - if (1 - right_nns > 0) { - selectivity += (1 - left_nns); - } - } else if (T_OP_EQ == op_type) { - selectivity = (std::min(left_ndv, right_ndv) / left_ndv) * left_nns; - } else if (T_OP_NE == op_type) { - if (right_ndv > 1.0) { - // if right ndv > 1.0, then there must exist one value not equal to left value - selectivity = left_nns; - } else { - selectivity = (1 - 1 / left_ndv) * left_nns; - } - } - } else { - if (T_OP_NSEQ == op_type) { - selectivity = (std::min(left_ndv, right_ndv) / right_ndv) * right_nns; - if (1 - left_nns > 0) { - selectivity += (1 - right_nns); - } - } else if (T_OP_EQ == op_type) { - selectivity = (std::min(left_ndv, right_ndv) / right_ndv) * right_nns; - } else if (T_OP_NE == op_type) { - if (left_ndv > 1.0) { - // if left ndv > 1.0, then there must exist one value not equal to right value - selectivity = right_nns; - } else { - selectivity = (1 - 1 / right_ndv) * right_nns; - } - } - } - } - } - if (OB_SUCC(ret) && selectivity >= 1.0 && IS_ANTI_JOIN(ctx.get_join_type())) { - selectivity = 1 - DEFAULT_ANTI_JOIN_SEL; - } - } else { - // inner join, outer join - if (calc_with_hist) { - // use frequency histogram calculate selectivity - double total_rows = 0; - double left_rows = 0; - double left_null = 0; - double right_rows = 0; - double right_null = 0; - if (OB_FAIL(get_join_pred_rows(left_handler.stat_->get_histogram(), - right_handler.stat_->get_histogram(), - false, total_rows))) { - LOG_WARN("failed to get join pred rows", K(ret)); - } else if (OB_FAIL(get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), ctx, - *left_expr, NULL, &left_null, NULL, &left_rows))) { - LOG_WARN("failed to get column basic info", K(ret)); - } else if (OB_FAIL(get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), ctx, - *right_expr, NULL, &right_null, NULL, &right_rows))) { - LOG_WARN("failed to get column basic info", K(ret)); - } else if (T_OP_NSEQ == op_type) { - selectivity = (total_rows + left_null * right_null) / left_rows / right_rows; - } else if (T_OP_EQ == op_type) { - selectivity = total_rows / left_rows / right_rows; - } else if (T_OP_NE == op_type) { - selectivity = ((left_rows - left_null) * (right_rows - right_null) - total_rows) - / left_rows / right_rows; - } - } else { - /** - * ## non NULL safe - * (1.0 - nullfrac1) * (1.0 - nullfrac2) / MAX(nd1, nd2) - * ## NULL safe - * (1.0 - nullfrac1) * (1.0 - nullfrac2) / MAX(nd1, nd2) + nullfraf1 * nullfrac2 - * 目前不会特殊考虑 outer join 的选择率, 而是在外层对行数进行 revise. - */ - if (T_OP_NSEQ == op_type) { - selectivity = left_nns * right_nns / std::max(left_ndv, right_ndv) - + (1 - left_nns) * (1 - right_nns); - } else if (T_OP_EQ == op_type) { - selectivity = left_nns * right_nns / std::max(left_ndv, right_ndv); - } else if (T_OP_NE == op_type) { - selectivity = left_nns * right_nns * (1 - 1/std::max(left_ndv, right_ndv)); - } - } - } - } - } else if (left_expr->is_column_ref_expr() || right_expr->is_column_ref_expr()) { - // col1 = func(col2), selectivity is 1 / ndv(col1) - // inner join and semi join use same formula - /** - * some test with generated table in oracle: - * select * from t1,t2,(select c1 as agg from t3) v where t1.c1 + t2.c1 = v.agg; - * => sel(t1.c1 + t2.c1 = v.agg) = 1/ndv(v.agg) - * select * from t1,t2,(select c1+c2 as agg from t3) v where t1.c1 + t2.c1 = v.agg; - * => sel(t1.c1 + t2.c1 = v.agg) = 1/100 - * select * from t1,t2,(select max(c1) as agg from t3) v where t1.c1 + t2.c1 = v.agg; - * => sel(t1.c1 + t2.c1 = v.agg) = 1/100 - * it seems like in oracle, if an equal condition is `col1 = fun(cols)` and col1 is from a - * generated table. oracle will check whether col1 is refered a basic column. If not, use - * default - */ - const ObRawExpr* column_expr = left_expr->is_column_ref_expr() ? left_expr : right_expr; - if (OB_FAIL(get_column_basic_sel(table_metas, ctx, *column_expr, &selectivity))) { - LOG_WARN("failed to get column basic selelectivity", K(ret)); - } else if (T_OP_NE == op_type) { - selectivity = 1 - selectivity; - } - } else { - // func(col) = func(col) - double left_sel = 0.0; - double right_sel = 0.0; - if (OB_FAIL(get_simple_equal_sel(table_metas, ctx, *left_expr, NULL, - T_OP_NSEQ == op_type, left_sel))) { - LOG_WARN("Failed to get simple predicate sel", K(ret)); - } else if (OB_FAIL(get_simple_equal_sel(table_metas, ctx, *right_expr, NULL, - T_OP_NSEQ == op_type, right_sel))) { - LOG_WARN("Failed to get simple predicate sel", K(ret)); - } else { - selectivity = std::min(left_sel, right_sel); - if (T_OP_NE == op_type) { - selectivity = 1 - selectivity; - } - } - } - return ret; -} - -int ObOptSelectivity::get_equal_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - ObIArray &quals, - double &selectivity) -{ - int ret = OB_SUCCESS; - ObSEArray left_exprs; - ObSEArray right_exprs; - ObSEArray null_safes; - bool is_valid; - if (OB_ISNULL(ctx.get_left_rel_ids()) || OB_ISNULL(ctx.get_right_rel_ids())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed get unexpected null", K(ret), K(ctx)); - } else if (OB_FAIL(is_valid_multi_join(quals, is_valid))) { - LOG_WARN("failed to check is valid multi join", K(ret)); - } else if (!is_valid) { - // multi join condition related to more than two table. Calculate selectivity for each join - // condition independently. - for (int64_t i = 0; OB_SUCC(ret) && i < quals.count(); ++i) { - ObRawExpr *cur_expr = quals.at(i); - double tmp_sel = 1.0; - if (OB_ISNULL(cur_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (OB_FAIL(get_equal_sel(table_metas, ctx, *cur_expr, tmp_sel))) { - LOG_WARN("failed to get equal selectivity", K(ret)); - } else { - selectivity *= tmp_sel; - } - } - } else if (OB_FAIL(extract_join_exprs(quals, *ctx.get_left_rel_ids(), *ctx.get_right_rel_ids(), - left_exprs, right_exprs, null_safes))) { - LOG_WARN("failed to extract join exprs", K(ret)); - } else if (OB_FAIL(get_cntcols_eq_cntcols_sel(table_metas, ctx, left_exprs, right_exprs, - null_safes, selectivity))) { - LOG_WARN("Failed to get equal sel", K(ret)); - } else { /* do nothing */ } - return ret; -} - -int ObOptSelectivity::extract_join_exprs(ObIArray &quals, - const ObRelIds &left_rel_ids, - const ObRelIds &right_rel_ids, - ObIArray &left_exprs, - ObIArray &right_exprs, - ObIArray &null_safes) -{ - int ret = OB_SUCCESS; - ObRawExpr *left_expr = NULL; - ObRawExpr *right_expr = NULL; - for (int64_t i = 0; OB_SUCC(ret) && i < quals.count(); ++i) { - ObRawExpr *cur_expr = quals.at(i); - if (OB_ISNULL(cur_expr) || - OB_ISNULL(left_expr = cur_expr->get_param_expr(0)) || - OB_ISNULL(right_expr = cur_expr->get_param_expr(1))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(cur_expr), K(left_expr), K(right_expr)); - } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(left_expr, left_expr)) || - OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(right_expr, right_expr))) { - LOG_WARN("failed to get expr without lossless cast", K(ret)); - } else if (OB_UNLIKELY(!left_expr->is_column_ref_expr() || !right_expr->is_column_ref_expr())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("all expr should be column ref", K(ret), K(*cur_expr)); - } else if (left_rel_ids.is_superset(left_expr->get_relation_ids()) && - right_rel_ids.is_superset(right_expr->get_relation_ids())) { - // do nothing - } else if (left_rel_ids.is_superset(right_expr->get_relation_ids()) && - right_rel_ids.is_superset(left_expr->get_relation_ids())) { - std::swap(left_expr, right_expr); - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected expr", K(ret), K(left_expr), K(right_expr)); - } - - if (OB_SUCC(ret)) { - if (OB_FAIL(left_exprs.push_back(left_expr))) { - LOG_WARN("failed to push back expr", K(ret)); - } else if (OB_FAIL(right_exprs.push_back(right_expr))) { - LOG_WARN("failed to push back expr", K(ret)); - } else if (OB_FAIL(null_safes.push_back(T_OP_NSEQ == cur_expr->get_expr_type()))) { - LOG_WARN("failed to push back null safe", K(ret)); - } - } - } - return ret; -} - -int ObOptSelectivity::get_cntcols_eq_cntcols_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObIArray &left_exprs, - const ObIArray &right_exprs, - const ObIArray &null_safes, - double &selectivity) -{ - int ret = OB_SUCCESS; - selectivity = DEFAULT_EQ_SEL; - ObSEArray left_ndvs; - ObSEArray right_ndvs; - ObSEArray left_not_null_sels; - ObSEArray right_not_null_sels; - double left_ndv = 1.0; - double right_ndv = 1.0; - double left_nns = 1.0; - double right_nns = 1.0; - double left_rows = 1.0; - double right_rows = 1.0; - double left_origin_rows = 1.0; - double right_origin_rows = 1.0; - bool left_contain_pk = false; - bool right_contain_pk = false; - bool is_union_pk = false; - bool refine_right_ndv = false; - bool refine_left_ndv = false; - - if (OB_ISNULL(ctx.get_plan())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (OB_FAIL(is_columns_contain_pkey(table_metas, left_exprs, - left_contain_pk, is_union_pk))) { - LOG_WARN("failed to check is columns contain pkey", K(ret)); - } else if (OB_FALSE_IT(refine_right_ndv = left_contain_pk && is_union_pk)) { - } else if (OB_FAIL(is_columns_contain_pkey(table_metas, right_exprs, - right_contain_pk, is_union_pk))) { - LOG_WARN("failed to check is columns contain pkey", K(ret)); - } else if (OB_FALSE_IT(refine_left_ndv = right_contain_pk && is_union_pk)) { - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < left_exprs.count(); ++i) { - if (OB_ISNULL(left_exprs.at(i)) || OB_ISNULL(right_exprs.at(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (OB_FAIL(get_column_ndv_and_nns(table_metas, ctx, *left_exprs.at(i), - &left_ndv, &left_nns))) { - LOG_WARN("failed to get left ndv and nns", K(ret)); - } else if (OB_FAIL(get_column_ndv_and_nns(table_metas, ctx, *right_exprs.at(i), - &right_ndv, &right_nns))) { - LOG_WARN("failed to get left ndv and nns", K(ret)); - } else if (OB_FAIL(left_not_null_sels.push_back(left_nns))) { - LOG_WARN("failed to push back not null sel", K(ret)); - } else if (OB_FAIL(right_not_null_sels.push_back(right_nns))) { - LOG_WARN("failed to push back not null sel", K(ret)); - } else if (OB_FAIL(left_ndvs.push_back(left_ndv))) { - LOG_WARN("failed to push back ndv", K(ret)); - } else if (OB_FAIL(right_ndvs.push_back(right_ndv))) { - LOG_WARN("failed to push back ndv", K(ret)); - } else if (0 == i) { - if (OB_FAIL(get_column_basic_info(table_metas, ctx, *left_exprs.at(i), - NULL, NULL, NULL, &left_rows))) { - LOG_WARN("failed to get column basic info", K(ret)); - } else if (OB_FAIL(get_column_basic_info(table_metas, ctx, *right_exprs.at(i), - NULL, NULL, NULL, &right_rows))) { - LOG_WARN("failed to get column basic info", K(ret)); - } else if (refine_right_ndv && - OB_FAIL(get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), - ctx, *left_exprs.at(i), - NULL, NULL, NULL, &left_origin_rows))) { - LOG_WARN("failed to get column basic info", K(ret)); - } else if (refine_left_ndv && - OB_FAIL(get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), - ctx, *right_exprs.at(i), - NULL, NULL, NULL, &right_origin_rows))) { - LOG_WARN("failed to get column basic info", K(ret)); - } - } - } - } - - if (OB_FAIL(ret)) { - } else if (OB_FAIL(calculate_distinct(table_metas, ctx, left_exprs, left_rows, left_ndv))) { - LOG_WARN("Failed to calculate distinct", K(ret)); - } else if (OB_FAIL(calculate_distinct(table_metas, ctx, right_exprs, right_rows, right_ndv))) { - LOG_WARN("Failed to calculate distinct", K(ret)); - } else if (IS_SEMI_ANTI_JOIN(ctx.get_join_type())) { - /** - * 对于 semi anti join, 选择率描述的是外表行数为基础的选择率 - * # FORMULA - * ## non NULL safe - * a) semi: `(min(left_ndv, right_ndv) / left_ndv) * left_not_null_sel(i)` - * ## NULL safe - * a) semi: non NULL safe selectivity + `nullsafe(i) && left_not_null_sel(i) < 1.0 ? null_sel(i) * selectivity(j) [where j != i]: 0` - */ - if (IS_LEFT_SEMI_ANTI_JOIN(ctx.get_join_type())) { - selectivity = std::min(left_ndv, right_ndv) / left_ndv; - for (int64_t i = 0; i < left_not_null_sels.count(); ++i) { - selectivity *= left_not_null_sels.at(i); - } - // 处理 null safe,这里假设多列上同时为null即小概率事件,只考虑特定列上为null的情况 - for (int64_t i = 0; i < null_safes.count(); ++i) { - if (OB_UNLIKELY(null_safes.at(i) && right_not_null_sels.at(i) < 1.0)) { - double factor = 1.0; - for (int64_t j = 0; j < null_safes.count(); ++j) { - if (i == j) { - factor *= (1 - left_not_null_sels.at(j)); - } else { - factor *= left_not_null_sels.at(j) * std::min(left_ndvs.at(j), right_ndvs.at(j)) / left_ndvs.at(j); - } - } - selectivity += factor; - } - } - } else { - selectivity = std::min(left_ndv, right_ndv) / right_ndv; - for (int64_t i = 0; i < right_not_null_sels.count(); ++i) { - selectivity *= right_not_null_sels.at(i); - } - // 处理 null safe,这里假设多列上同时为null即小概率事件,只考虑特定列上为null的情况 - for (int64_t i = 0; i < null_safes.count(); ++i) { - if (OB_UNLIKELY(null_safes.at(i) && right_not_null_sels.at(i) < 1.0)) { - double factor = 1.0; - for (int64_t j = 0; j < null_safes.count(); ++j) { - if (i == j) { - factor *= (1 - right_not_null_sels.at(j)); - } else { - factor *= right_not_null_sels.at(j) * std::min(left_ndvs.at(j), right_ndvs.at(j)) / right_ndvs.at(j); - } - } - selectivity += factor; - } - } - } - } else { - /** - * # FORMULA - * ## non NULL safe - * 1 / MAX(ndv1, ndv2) * not_null_frac1_col1 * not_null_frac2_col1 * not_null_frac1_col2 * not_null_frac2_col2 * ... - * ## NULL safe - * non NULL safe selectivity + `nullsafe(i) ? (1 - not_null_frac1_col(i)) * (1 - not_null_frac2_col(i)) * selectivity(col(j)) [where j != i]: 0` - * 目前不会特殊考虑 outer join 的选择率, 而是在外层对行数进行 revise. - */ - if (left_contain_pk == right_contain_pk) { - // 两侧都不是主键或都是主键, 不做修正 - } else if (refine_right_ndv) { - // 一侧有主键时, 认为是主外键连接, 外键上最大的ndv为即为主键的原始ndv - right_ndv = std::min(right_ndv, left_origin_rows); - } else if (refine_left_ndv) { - left_ndv = std::min(left_ndv, right_origin_rows); - } else { - // do nothing - } - selectivity = 1.0 / std::max(left_ndv, right_ndv); - for (int64_t i = 0; i < left_not_null_sels.count(); ++i) { - selectivity *= left_not_null_sels.at(i) * right_not_null_sels.at(i); - } - // 处理null safe, 这里假设多列上同时为null即小概率事件,只考虑特定列上为null的情况 - for (int64_t i = 0; i < null_safes.count(); ++i) { - if (null_safes.at(i)) { - double factor = 1.0; - for (int64_t j = 0; j < null_safes.count(); ++j) { - if (i == j) { - factor *= (1 - left_not_null_sels.at(j)) * (1 - right_not_null_sels.at(j)); - } else { - factor *= left_not_null_sels.at(j) * right_not_null_sels.at(j) / std::max(left_ndvs.at(j), right_ndvs.at(j)); - } - } - selectivity += factor; - } else {/* do nothing */} - } - } - LOG_TRACE("selectivity of `col_ref1 =|<=> col_ref1 and col_ref2 =|<=> col_ref2`", K(selectivity)); - return ret; -} - /** * 计算equal join condition的左右表选择率 * left_selectivity = right_ndv / left_ndv @@ -1969,264 +1098,6 @@ int ObOptSelectivity::calc_sel_for_equal_join_cond(const OptTableMetas &table_me return ret; } -int ObOptSelectivity::get_in_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) -{ - int ret = OB_SUCCESS; - selectivity = 0.0; - double tmp_selectivity = 1.0; - double distinct_sel = 1.0; - double null_sel = 0.0; - const ObRawExpr *left_expr = NULL; - const ObRawExpr *right_expr = NULL; - const ObRawExpr *param_expr = NULL; - bool contain_null = false; - if (OB_UNLIKELY(2 != qual.get_param_count()) || - OB_ISNULL(left_expr = qual.get_param_expr(0)) || - OB_ISNULL(right_expr = qual.get_param_expr(1)) || - T_OP_ROW != right_expr->get_expr_type()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpect expr", K(ret), K(qual), K(left_expr), K(right_expr)); - } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(left_expr, left_expr))) { - LOG_WARN("failed to get expr without lossless cast", K(ret)); - } else if (OB_LIKELY(left_expr->is_column_ref_expr() && !right_expr->has_flag(CNT_COLUMN))) { - ObOptColumnStatHandle handler; - ObObj expr_value; - bool histogram_valid = false; - const ObColumnRefRawExpr *col = static_cast(left_expr); - hash::ObHashSet obj_set; - double hist_scale = 0; - if (OB_FAIL(obj_set.create(hash::cal_next_prime(right_expr->get_param_count()), - "OptSelHashSet", "OptSelHashSet"))) { - LOG_WARN("failed to create hash set", K(ret), K(right_expr->get_param_count())); - } else if (OB_FAIL(get_column_basic_sel(table_metas, ctx, *left_expr, &distinct_sel, &null_sel))) { - LOG_WARN("failed to get column basic selectivity", K(ret)); - } else if (OB_FAIL(get_column_hist_scale(table_metas, ctx, *left_expr, hist_scale))) { - LOG_WARN("failed to get columnn hist sample scale", K(ret)); - } else if (OB_FAIL(get_histogram_by_column(table_metas, ctx, - col->get_table_id(), - col->get_column_id(), - handler))) { - LOG_WARN("failed to get histogram by column", K(ret)); - } else if (handler.stat_ != NULL && handler.stat_->get_histogram().is_valid()) { - histogram_valid = true; - } - for (int64_t i = 0; OB_SUCC(ret) && i < right_expr->get_param_count(); ++i) { - // bool can_use_hist = false; - bool get_value = false; - if (OB_ISNULL(param_expr = right_expr->get_param_expr(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret)); - } else if (OB_FAIL(get_compare_value(ctx, col, param_expr, expr_value, get_value))) { - // cast may failed due to invalid type or value out of range. - // Then use ndv instead of histogram - get_value = false; - ret = OB_SUCCESS; - } - if (OB_SUCC(ret)) { - if (histogram_valid && get_value) { - double null_sel = 0; - if (OB_HASH_EXIST == obj_set.exist_refactored(expr_value)) { - // duplicate value, do nothing - } else if (OB_FAIL(obj_set.set_refactored(expr_value))) { - LOG_WARN("failed to set refactorcd", K(ret), K(expr_value)); - } else if (OB_FAIL(get_equal_pred_sel(handler.stat_->get_histogram(), - expr_value, - hist_scale, - tmp_selectivity))) { - LOG_WARN("failed to get equal density", K(ret)); - } else { - selectivity += tmp_selectivity * (1 - null_sel); - } - } else if (!get_value) { - // invalid value, for example c1 in (exec_param). Do not check obj exists. - if (param_expr->get_result_type().is_null()) { - contain_null = true; - } else { - selectivity += distinct_sel; - } - } else if (OB_HASH_EXIST == obj_set.exist_refactored(expr_value)) { - // do nothing - } else if (OB_FAIL(obj_set.set_refactored(expr_value))) { - LOG_WARN("failed to set refactorcd", K(ret), K(expr_value)); - } else if (expr_value.is_null()) { - contain_null = true; - } else { - selectivity += distinct_sel; - } - } - } - if (obj_set.created()) { - int tmp_ret = OB_SUCCESS; - if (OB_SUCCESS != (tmp_ret = obj_set.destroy())) { - LOG_WARN("failed to destroy hash set", K(tmp_ret), K(ret)); - ret = COVER_SUCC(tmp_ret); - } - } - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < right_expr->get_param_count(); ++i) { - if (OB_ISNULL(param_expr = right_expr->get_param_expr(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret)); - } else if (OB_FAIL(get_equal_sel(table_metas, ctx, *left_expr, *param_expr, - false, tmp_selectivity))) { - LOG_WARN("Failed to get equal sel", K(ret), KPC(left_expr)); - } else { - selectivity += tmp_selectivity; - } - } - } - - selectivity = revise_between_0_1(selectivity); - if (OB_SUCC(ret) && T_OP_NOT_IN == qual.get_expr_type()) { - selectivity = 1.0 - selectivity; - if (contain_null) { - selectivity = 0.0; - } else if (left_expr->has_flag(CNT_COLUMN) && !right_expr->has_flag(CNT_COLUMN)) { - ObSEArray cur_vars; - if (OB_FAIL(ObRawExprUtils::extract_column_exprs(left_expr, cur_vars))) { - LOG_WARN("failed to extract column exprs", K(ret)); - } else if (1 == cur_vars.count()) { // only one column, consider null_sel - if (OB_ISNULL(cur_vars.at(0))) { - LOG_WARN("expr is null", K(ret)); - } else if (OB_FAIL(get_column_basic_sel(table_metas, ctx, *cur_vars.at(0), - &distinct_sel, &null_sel))) { - LOG_WARN("failed to get column basic sel", K(ret)); - } else if (distinct_sel > ((1.0 - null_sel) / 2.0)) { - // ndv < 2 - // TODO: @yibo 这个refine过程不太理解 - selectivity = distinct_sel / 2.0; - } else { - selectivity -= null_sel; - selectivity = std::max(distinct_sel, selectivity); // at least one distinct_sel - } - } else { }//do nothing - } - } - return ret; -} - -int ObOptSelectivity::get_is_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) -{ - int ret = OB_SUCCESS; - selectivity = DEFAULT_SEL; - const ParamStore *params = ctx.get_params(); - const ObDMLStmt *stmt = ctx.get_stmt(); - const ObRawExpr *left_expr = qual.get_param_expr(0); - const ObRawExpr *right_expr = qual.get_param_expr(1); - ObObj result; - bool got_result = false; - if (OB_ISNULL(params) || OB_ISNULL(stmt) || OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpect null", K(ret), K(params), K(stmt), K(left_expr), K(right_expr)); - } else if (OB_UNLIKELY(!ObOptEstUtils::is_calculable_expr(*right_expr, params->count()))) { - // do nothing - } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), - right_expr, - result, - got_result, - ctx.get_allocator()))) { - LOG_WARN("failed to calculate const or calculable expr", K(ret)); - } else if (!got_result) { - // do nothing - } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(left_expr, left_expr))) { - LOG_WARN("failed to get expr without lossless cast", K(ret)); - } else if (left_expr->is_column_ref_expr()) { - if (OB_FAIL(check_column_in_current_level_stmt(stmt, *left_expr))) { - LOG_WARN("Failed to check column whether is in current stmt", K(ret)); - } else if (OB_LIKELY(result.is_null())) { - if (OB_FAIL(get_column_basic_sel(table_metas, ctx, *left_expr, NULL, &selectivity))) { - LOG_WARN("Failed to get var distinct sel", K(ret)); - } - } else if (result.is_tinyint() && - !ob_is_string_or_lob_type(left_expr->get_data_type())) { - double distinct_sel = 0.0; - double null_sel = 0.0; - if (OB_FAIL(get_column_basic_sel(table_metas, ctx, *left_expr, &distinct_sel, &null_sel))) { - LOG_WARN("Failed to get var distinct sel", K(ret)); - } else { - //distinct_num < 2. That is distinct_num only 1,(As double and statistics not completely accurate, - //use (1 - null_sel)/ 2.0 to check) - if (distinct_sel > (1 - null_sel) / 2.0) { - //Ihe formula to calc sel of 'c1 is true' is (1 - distinct_sel(var = 0) - null_sel). - //If distinct_num is 1, the sel would be 0.0. - //But we don't kown whether distinct value is 0. So gess the selectivity: (1 - null_sel)/2.0 - distinct_sel = (1- null_sel) / 2.0;//don't kow the value, just get half. - } - selectivity = (result.is_true()) ? (1 - distinct_sel - null_sel) : distinct_sel; - } - } else { }//default sel - } else { - //TODO func(cnt_column) - } - - if (T_OP_IS_NOT == qual.get_expr_type()) { - selectivity = 1.0 - selectivity; - } - return ret; -} - -int ObOptSelectivity::get_range_cmp_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) -{ - int ret = OB_SUCCESS; - selectivity = DEFAULT_INEQ_SEL; - const ObRawExpr *left_expr = qual.get_param_expr(0); - const ObRawExpr *right_expr = qual.get_param_expr(1); - if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret), K(left_expr), K(right_expr)); - } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(left_expr, left_expr)) || - OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(right_expr, right_expr))) { - LOG_WARN("failed to get expr without lossless cast", K(ret)); - } else if ((left_expr->is_column_ref_expr() && right_expr->is_const_expr()) || - (left_expr->is_const_expr() && right_expr->is_column_ref_expr())) { - const ObRawExpr *col_expr = left_expr->is_column_ref_expr() ? left_expr : right_expr; - if (OB_FAIL(get_column_range_sel(table_metas, ctx, - static_cast(*col_expr), - qual, selectivity))) { - LOG_WARN("Failed to get column range sel", K(qual), K(ret)); - } - } else if (T_OP_ROW == left_expr->get_expr_type() && T_OP_ROW == right_expr->get_expr_type()) { - //only deal (col1, xx, xx) CMP (const, xx, xx) - if (left_expr->get_param_count() == 1 && OB_NOT_NULL(left_expr->get_param_expr(0)) && - T_OP_ROW == left_expr->get_param_expr(0)->get_expr_type()) { - left_expr = left_expr->get_param_expr(0); - } - if (right_expr->get_param_count() == 1 && OB_NOT_NULL(right_expr->get_param_expr(0)) && - T_OP_ROW == right_expr->get_param_expr(0)->get_expr_type()) { - right_expr = right_expr->get_param_expr(0); - } - if (left_expr->get_param_count() != right_expr->get_param_count()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("param count should be equal", - K(left_expr->get_param_count()), K(right_expr->get_param_count())); - } else if (left_expr->get_param_count() <= 1) { - // do nothing - } else if (OB_ISNULL(left_expr = left_expr->get_param_expr(0)) || - OB_ISNULL(right_expr = right_expr->get_param_expr(0))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(left_expr), K(right_expr)); - } else if ((left_expr->is_column_ref_expr() && right_expr->is_const_expr()) || - (left_expr->is_const_expr() && right_expr->is_column_ref_expr())) { - const ObRawExpr *col_expr = (left_expr->is_column_ref_expr()) ? (left_expr) : (right_expr); - if (OB_FAIL(get_column_range_sel(table_metas, ctx, - static_cast(*col_expr), - qual, selectivity))) { - LOG_WARN("failed to get column range sel", K(ret)); - } - } else { /* no dothing */ } - } - return ret; -} - int ObOptSelectivity::get_column_range_sel(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObColumnRefRawExpr &col_expr, @@ -2316,6 +1187,67 @@ int ObOptSelectivity::get_column_range_sel(const OptTableMetas &table_metas, return ret; } +int ObOptSelectivity::get_column_range_min_max(const OptSelectivityCtx &ctx, + const ObColumnRefRawExpr *col_expr, + const ObIArray &quals, + ObObj &obj_min, + ObObj &obj_max) +{ + int ret = OB_SUCCESS; + const ObDMLStmt *stmt = ctx.get_stmt(); + uint64_t tid = 0; + uint64_t cid = 0; + ObQueryRange query_range; + ObQueryRangeArray ranges; + ObSEArray column_items; + if (OB_ISNULL(stmt) || OB_ISNULL(col_expr) || + FALSE_IT(tid = col_expr->get_table_id()) || + FALSE_IT(cid = col_expr->get_column_id())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(stmt), KPC(col_expr)); + } else if (OB_FAIL(check_column_in_current_level_stmt(stmt, *col_expr))) { + LOG_WARN("failed to check if column is in current level stmt", KPC(col_expr), K(ret)); + } else if (OB_FAIL(get_column_query_range(ctx, tid, cid, quals, + column_items, query_range, ranges))) { + LOG_WARN("failed to get column query range", K(ret)); + } else if (OB_ISNULL(column_items.at(0).expr_) || + OB_UNLIKELY(ranges.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected range", K(ret), K(column_items), K(ranges)); + } else if (ranges.at(0)->is_whole_range() || + ranges.at(0)->empty()) { + // do nothing + } else { + bool is_valid = true; + ObObj tmp_min, tmp_max; + tmp_min.set_max_value(); + tmp_max.set_min_value(); + for (int64_t i = 0; OB_SUCC(ret) && is_valid && i < ranges.count(); ++i) { + if (OB_ISNULL(ranges.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null range", K(ret), K(i)); + } else if (ranges.at(i)->is_whole_range() || + ranges.at(i)->empty()) { + is_valid = false; + } else { + const ObRowkey &startkey = ranges.at(i)->get_start_key(); + const ObRowkey &endkey = ranges.at(i)->get_end_key(); + tmp_min = std::min(tmp_min, startkey.get_obj_ptr()[0]); + tmp_max = std::max(tmp_max, endkey.get_obj_ptr()[0]); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ob_write_obj(ctx.get_allocator(), tmp_min, obj_min))) { + LOG_WARN("fail to deep copy ObObj", K(ret), K(obj_min)); + } else if (OB_FAIL(ob_write_obj(ctx.get_allocator(), tmp_max, obj_max))) { + LOG_WARN("fail to deep copy ObObj", K(ret), K(obj_min)); + } + LOG_TRACE("Get column range min max", K(obj_min), K(obj_max), K(quals)); + } + + return ret; +} + int ObOptSelectivity::get_single_newrange_selectivity(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObIArray &range_columns, @@ -2561,702 +1493,6 @@ double ObOptSelectivity::revise_range_sel(double selectivity, return revise_between_0_1(selectivity); } -int ObOptSelectivity::get_like_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity, - bool &can_calc_sel) -{ - int ret = OB_SUCCESS; - selectivity = DEFAULT_INEQ_SEL; - const ObRawExpr *variable = NULL; - const ObRawExpr *pattern = NULL; - const ObRawExpr *escape = NULL; - const ParamStore *params = ctx.get_params(); - can_calc_sel = false; - if (3 != qual.get_param_count()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("like expr should have 3 param", K(ret), K(qual)); - } else if (OB_ISNULL(params) || - OB_ISNULL(variable = qual.get_param_expr(0)) || - OB_ISNULL(pattern = qual.get_param_expr(1)) || - OB_ISNULL(escape = qual.get_param_expr(2))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null params", K(ret), K(params), K(variable), K(pattern), K(escape)); - } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(variable, variable))) { - LOG_WARN("failed to get expr without lossless cast", K(ret)); - } else if (variable->is_column_ref_expr() && - ObOptEstUtils::is_calculable_expr(*pattern, params->count()) && - ObOptEstUtils::is_calculable_expr(*escape, params->count())) { - bool is_start_with = false; - if (is_lob_storage(variable->get_data_type())) { - // no statistics for lob type, use default selectivity - selectivity = DEFAULT_CLOB_LIKE_SEL; - } else if (OB_FAIL(ObOptEstUtils::if_expr_start_with_patten_sign(params, pattern, escape, - ctx.get_opt_ctx().get_exec_ctx(), - ctx.get_allocator(), - is_start_with))) { - LOG_WARN("failed to check if expr start with percent sign", K(ret)); - } else if (is_start_with) { - // do nothing - } else if (OB_FAIL(get_column_range_sel(table_metas, ctx, - static_cast(*variable), - qual, selectivity))) { - LOG_WARN("Failed to get column range selectivity", K(ret)); - } else { - can_calc_sel = true; - } - } - return ret; -} - -int ObOptSelectivity::get_btw_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) -{ - int ret = OB_SUCCESS; - selectivity = DEFAULT_SEL; - const ObRawExpr *cmp_expr = NULL; - const ObRawExpr *l_expr = NULL; - const ObRawExpr *r_expr = NULL; - const ObRawExpr *col_expr = NULL; - const ParamStore *params = ctx.get_params(); - if (3 != qual.get_param_count()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("between expr should have 3 param", K(ret), K(qual)); - } else if (OB_ISNULL(params) || - OB_ISNULL(cmp_expr = qual.get_param_expr(0)) || - OB_ISNULL(l_expr = qual.get_param_expr(1)) || - OB_ISNULL(r_expr = qual.get_param_expr(2))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null params", K(ret), K(params), K(cmp_expr), K(l_expr), K(r_expr)); - } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(cmp_expr, cmp_expr))) { - LOG_WARN("failed to get expr without lossless cast", K(ret)); - } else if (cmp_expr->is_column_ref_expr() && - ObOptEstUtils::is_calculable_expr(*l_expr, params->count()) && - ObOptEstUtils::is_calculable_expr(*r_expr, params->count())) { - col_expr = cmp_expr; - } else if (ObOptEstUtils::is_calculable_expr(*cmp_expr, params->count()) && - l_expr->is_column_ref_expr() && - ObOptEstUtils::is_calculable_expr(*r_expr, params->count())) { - col_expr = l_expr; - } else if (ObOptEstUtils::is_calculable_expr(*cmp_expr, params->count()) && - ObOptEstUtils::is_calculable_expr(*l_expr, params->count()) && - r_expr->is_column_ref_expr()) { - col_expr = r_expr; - } - if (NULL != col_expr) { - if (OB_FAIL(get_column_range_sel(table_metas, ctx, - static_cast(*col_expr), - qual, selectivity))) { - LOG_WARN("failed to get column range sel", K(ret)); - } - } - return ret; -} - -int ObOptSelectivity::get_not_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity, - ObIArray &all_predicate_sel) -{ - int ret = OB_SUCCESS; - selectivity = DEFAULT_SEL; - const ObRawExpr *child_expr = qual.get_param_expr(0); - double tmp_selectivity = 1.0; - ObSEArray cur_vars; - if (OB_ISNULL(child_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret)); - } else if (OB_FAIL(calculate_qual_selectivity(table_metas, ctx, *child_expr, - tmp_selectivity, all_predicate_sel))) { - LOG_WARN("failed to calculate one qual selectivity", K(child_expr), K(ret)); - } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(child_expr, cur_vars))) { - LOG_WARN("failed to extract column exprs", K(ret)); - } else if (1 == cur_vars.count() && - T_OP_IS != child_expr->get_expr_type() && - T_OP_IS_NOT != child_expr->get_expr_type() && - T_OP_NSEQ != child_expr->get_expr_type()) { // for only one column, consider null_sel - double null_sel = 1.0; - if (OB_ISNULL(cur_vars.at(0))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret)); - } else if (OB_FAIL(get_column_basic_sel(table_metas, ctx, *cur_vars.at(0), NULL, &null_sel))) { - LOG_WARN("failed to get column basic sel", K(ret)); - } else { - selectivity = 1.0 - null_sel - tmp_selectivity; - } - } else { - // for other condition, it's is too hard to consider null_sel, so ignore it. - // t_op_is, t_op_nseq , they are null safe exprs, don't consider null_sel. - selectivity = 1.0 - tmp_selectivity; - } - return ret; -} - -int ObOptSelectivity::get_ne_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) -{ - int ret = OB_SUCCESS; - const ObRawExpr *left_expr = qual.get_param_expr(0); - const ObRawExpr *right_expr = qual.get_param_expr(1); - if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret), K(qual), K(left_expr), K(right_expr)); - } else if (OB_FAIL(get_ne_sel(table_metas, ctx, *left_expr, *right_expr, selectivity))) { - LOG_WARN("failed to get equal sel", K(ret)); - } - return ret; -} - -int ObOptSelectivity::get_ne_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &l_expr, - const ObRawExpr &r_expr, - double &selectivity) -{ - int ret = OB_SUCCESS; - selectivity = DEFAULT_SEL; - if (T_OP_ROW == l_expr.get_expr_type() && T_OP_ROW == r_expr.get_expr_type()) { - // (var1, var2) != (var3, var4) => var1 != var3 or var2 != var4 - selectivity = 0; - double tmp_selectivity = 1.0; - const ObRawExpr *l_param = NULL; - const ObRawExpr *r_param = NULL; - const ObRawExpr *l_row = &l_expr; - const ObRawExpr *r_row = &r_expr; - if (l_expr.get_param_count() == 1 && OB_NOT_NULL(l_expr.get_param_expr(0)) && - T_OP_ROW == l_expr.get_param_expr(0)->get_expr_type()) { - l_row = l_expr.get_param_expr(0); - } - if (r_expr.get_param_count() == 1 && OB_NOT_NULL(r_expr.get_param_expr(0)) && - T_OP_ROW == r_expr.get_param_expr(0)->get_expr_type()) { - r_row = r_expr.get_param_expr(0); - } - if (OB_UNLIKELY(l_row->get_param_count() != r_row->get_param_count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected expr", KPC(l_row), KPC(r_row), K(ret)); - } else { - int64_t num = l_row->get_param_count(); - for (int64_t i = 0; OB_SUCC(ret) && i < num; ++i) { - if (OB_ISNULL(l_param = l_row->get_param_expr(i)) || - OB_ISNULL(r_param = r_row->get_param_expr(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret), K(l_row), K(r_row), K(i)); - } else if (OB_FAIL(SMART_CALL(get_ne_sel(table_metas, ctx, *l_param, - *r_param, tmp_selectivity)))) { - LOG_WARN("failed to get equal selectivity", K(ret)); - } else { - selectivity += tmp_selectivity - selectivity * tmp_selectivity; - } - } - } - } else if (l_expr.has_flag(CNT_COLUMN) && r_expr.has_flag(CNT_COLUMN)) { - if (OB_FAIL(get_cntcol_op_cntcol_sel(table_metas, ctx, l_expr, r_expr, T_OP_NE, selectivity))) { - LOG_WARN("failed to get cntcol op cntcol sel", K(ret)); - } - } else if ((l_expr.has_flag(CNT_COLUMN) && !r_expr.has_flag(CNT_COLUMN)) || - (!l_expr.has_flag(CNT_COLUMN) && r_expr.has_flag(CNT_COLUMN))) { - const ObRawExpr *cnt_col_expr = l_expr.has_flag(CNT_COLUMN) ? &l_expr : &r_expr; - const ObRawExpr *const_expr = l_expr.has_flag(CNT_COLUMN) ? &r_expr : &l_expr; - ObSEArray column_exprs; - bool only_monotonic_op = true; - bool null_const = false; - double ndv = 1.0; - double nns = 0; - bool can_use_hist = false; - ObObj expr_value; - ObOptColumnStatHandle handler; - if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(cnt_col_expr, cnt_col_expr))) { - LOG_WARN("failed to check is lossless column cast", K(ret)); - } else if (cnt_col_expr->is_column_ref_expr()) { - // column != const - const ObColumnRefRawExpr *col = static_cast(cnt_col_expr); - if (OB_FAIL(get_histogram_by_column(table_metas, ctx, col->get_table_id(), - col->get_column_id(), handler))) { - LOG_WARN("failed to get histogram by column", K(ret)); - } else if (handler.stat_ == NULL || !handler.stat_->get_histogram().is_valid()) { - // do nothing - } else if (OB_FAIL(get_compare_value(ctx, col, const_expr, expr_value, can_use_hist))) { - // cast may failed due to invalid type or value out of range. - // Then use ndv instead of histogram - can_use_hist = false; - ret = OB_SUCCESS; - } - } - if (OB_SUCC(ret)) { - if (can_use_hist) { - double hist_scale = 0; - if (OB_FAIL(get_column_hist_scale(table_metas, ctx, *cnt_col_expr, hist_scale))) { - LOG_WARN("failed to get columnn hist sample scale", K(ret)); - } else if (OB_FAIL(get_equal_pred_sel(handler.stat_->get_histogram(), expr_value, hist_scale, selectivity))) { - LOG_WARN("Failed to get equal density", K(ret)); - } else if (OB_FAIL(get_column_ndv_and_nns(table_metas, ctx, *cnt_col_expr, NULL, &nns))) { - LOG_WARN("failed to get column ndv and nns", K(ret)); - } else { - selectivity = (1.0 - selectivity) * nns; - } - } else if (OB_FAIL(ObOptEstUtils::extract_column_exprs_with_op_check(cnt_col_expr, - column_exprs, - only_monotonic_op))) { - LOG_WARN("failed to extract column exprs with op check", K(ret)); - } else if (!only_monotonic_op || column_exprs.count() > 1) { - selectivity = DEFAULT_SEL; //cnt_col_expr contain not monotonic op OR has more than 1 var - } else if (OB_UNLIKELY(1 != column_exprs.count()) || OB_ISNULL(column_exprs.at(0))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected contain column expr", K(ret), K(*cnt_col_expr)); - } else if (OB_FAIL(ObOptEstUtils::if_expr_value_null(ctx.get_params(), - *const_expr, - ctx.get_opt_ctx().get_exec_ctx(), - ctx.get_allocator(), - null_const))) { - LOG_WARN("Failed to check whether expr null value", K(ret)); - } else if (null_const) { - selectivity = 0.0; - } else if (OB_FAIL(get_column_ndv_and_nns(table_metas, ctx, *column_exprs.at(0), &ndv, &nns))) { - LOG_WARN("failed to get column ndv and nns", K(ret)); - } else if (ndv < 2.0) { - //The reason doing this is similar as get_is_sel function. - //If distinct_num is 1, As formula, selectivity of 'c1 != 1' would be 0.0. - //But we don't know the distinct value, so just get the half selectivity. - selectivity = nns / ndv / 2.0; - } else { - selectivity = nns * (1.0 - 1 / ndv); - } - } - } else { }//do nothing - return ret; -} - -int ObOptSelectivity::get_agg_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) -{ - int ret = OB_SUCCESS; - const double origin_rows = ctx.get_row_count_1(); // rows before group by - const double grouped_rows = ctx.get_row_count_2();// rows after group by - bool is_valid = false; - const ObRawExpr *aggr_expr = NULL; - const ObRawExpr *const_expr1 = NULL; - const ObRawExpr *const_expr2 = NULL; - selectivity = DEFAULT_AGG_RANGE; - ObItemType type = qual.get_expr_type(); - // for aggregate function in having clause, only support - // = <=> != > >= < <= [not] btw [not] in - if (-1.0 == origin_rows || -1.0 == grouped_rows) { - // 不是在group by层计算的having filter,使用默认选择率 - // e.g. select * from t7 group by c1 having count(*) > (select c1 from t8 limit 1); - // 该sql中having filter需要在subplan filter中计算 - } else if ((type >= T_OP_EQ && type <= T_OP_NE) || - T_OP_IN == type || T_OP_NOT_IN == type || - T_OP_BTW == type || T_OP_NOT_BTW == type) { - if (OB_FAIL(is_valid_agg_qual(qual, is_valid, aggr_expr, const_expr1, const_expr2))) { - LOG_WARN("failed to check is valid agg qual", K(ret)); - } else if (!is_valid) { - /* use default selectivity */ - } else if (OB_ISNULL(aggr_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (T_FUN_MAX == aggr_expr->get_expr_type() || - T_FUN_MIN == aggr_expr->get_expr_type() || - T_FUN_COUNT == aggr_expr->get_expr_type()) { - if (T_OP_EQ == type || T_OP_NSEQ == type) { - selectivity = DEFAULT_AGG_EQ; - } else if (T_OP_NE == type || IS_RANGE_CMP_OP(type)) { - selectivity = DEFAULT_AGG_RANGE; - } else if (T_OP_BTW == type) { - // agg(col) btw const1 and const2 <=> agg(col) > const1 AND agg(col) < const2 - selectivity = DEFAULT_AGG_RANGE * DEFAULT_AGG_RANGE; - } else if (T_OP_NOT_BTW == type) { - // agg(col) not btw const1 and const2 <=> agg(col) < const1 OR agg(col) > const2 - // 计算方式参考OR - selectivity = DEFAULT_AGG_RANGE + DEFAULT_AGG_RANGE; - } else if (T_OP_IN == type) { - /** - * oracle 对 max/min/count(col) in (const1, const2, const3, ...)的选择率估计 - * 当const的数量小于等于5时,每增加一个const值,选择率增加 DEFAULT_AGG_EQ(0.01) - * 当const的数量大于5时,每增加一个const值,选择率增加 - * DEFAULT_AGG_EQ - 0.001 * (const_num - 5) - * # 这里的选择率增加量采用线性下降其实并不是很精确,oracle的选择率增加量可能采用了了指数下降, - * 在测试过程中测试了1-30列递增的情况,线性下降和指数下降区别不大。 - */ - int64_t N; - if(OB_ISNULL(const_expr1)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null"); - } else if (FALSE_IT(N = const_expr1->get_param_count())) { - } else if (N < 6) { - selectivity = DEFAULT_AGG_EQ * N; - } else { - N = std::min(N, 15L); - selectivity = DEFAULT_AGG_EQ * 5 + (DEFAULT_AGG_EQ - 0.0005 * (N - 4)) * (N - 5); - } - } else if (T_OP_NOT_IN == type) { - // agg(col) not in (const1, const2, ...) <=> agg(col) != const1 and agg(col) != const2 and ... - if(OB_ISNULL(const_expr1)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null"); - } else { - selectivity = std::pow(DEFAULT_AGG_RANGE, const_expr1->get_param_count()); - } - } else { /* use default selectivity */ } - } else if (T_FUN_SUM == aggr_expr->get_expr_type() || T_FUN_AVG == aggr_expr->get_expr_type()) { - LOG_TRACE("show group by origen rows and grouped rows", K(origin_rows), K(grouped_rows)); - double rows_per_group = grouped_rows == 0.0 ? origin_rows : origin_rows / grouped_rows; - if (OB_FAIL(get_agg_sel_with_minmax(table_metas, ctx, *aggr_expr, const_expr1, - const_expr2, type, selectivity, rows_per_group))) { - LOG_WARN("failed to get agg sel with minmax", K(ret)); - } - } else { /* not max/min/count/sum/avg, use default selectivity */ } - } else { /* use default selectivity */ } - return ret; -} - -int ObOptSelectivity::get_agg_sel_with_minmax(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &aggr_expr, - const ObRawExpr *const_expr1, - const ObRawExpr *const_expr2, - const ObItemType type, - double &selectivity, - const double rows_per_group) -{ - int ret = OB_SUCCESS; - selectivity = DEFAULT_AGG_RANGE; - const ParamStore *params = ctx.get_params(); - const ObDMLStmt *stmt = ctx.get_stmt(); - ObExecContext *exec_ctx = ctx.get_opt_ctx().get_exec_ctx(); - ObIAllocator &alloc = ctx.get_allocator(); - ObObj result1; - ObObj result2; - bool got_result; - double distinct_sel = 1.0; - ObObj maxobj; - ObObj minobj; - maxobj.set_max_value(); - minobj.set_min_value(); - if (OB_ISNULL(aggr_expr.get_param_expr(0)) || OB_ISNULL(params) || - OB_ISNULL(stmt) || OB_ISNULL(const_expr1)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(aggr_expr.get_param_expr(0)), - K(params), K(stmt), K(const_expr1)); - } else if (!aggr_expr.get_param_expr(0)->is_column_ref_expr()) { - // 只处理sum(column)的形式,sum(column + 1)/sum(column1 + column2)都是用默认选择率 - } else if (OB_FAIL(get_column_basic_sel(table_metas, ctx, *aggr_expr.get_param_expr(0), - &distinct_sel, NULL))) { - LOG_WARN("failed to get column basic sel", K(ret)); - } else if (OB_FAIL(get_column_min_max(table_metas, ctx, *aggr_expr.get_param_expr(0), - minobj, maxobj))) { - LOG_WARN("failed to get column min max", K(ret)); - } else if (minobj.is_min_value() || maxobj.is_max_value()) { - // do nothing - } else if (T_OP_IN == type || T_OP_NOT_IN == type) { - if (OB_UNLIKELY(T_OP_ROW != const_expr1->get_expr_type())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("expr should be row", K(ret), K(*const_expr1)); - } else { - // 如果row超过5列,则计算5列上的选择率,再按比例放大 - int64_t N = const_expr1->get_param_count() > 5 ? 5 :const_expr1->get_param_count(); - selectivity = T_OP_IN == type ? 0.0 : 1.0; - for (int64_t i = 0; OB_SUCC(ret) && i < N; ++i) { - double tmp_sel = T_OP_IN == type ? DEFAULT_AGG_EQ : DEFAULT_AGG_RANGE; - const ObRawExpr *sub_expr = NULL; - ObObj tmp_result; - if (OB_ISNULL(sub_expr = const_expr1->get_param_expr(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (!ObOptEstUtils::is_calculable_expr(*sub_expr, params->count())) { - } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(exec_ctx, - sub_expr, - tmp_result, - got_result, - alloc))) { - LOG_WARN("failed to calc const or calculable expr", K(ret)); - } else if (!got_result) { - // do nothing - } else { - tmp_sel = get_agg_eq_sel(maxobj, minobj, tmp_result, distinct_sel, rows_per_group, - T_OP_IN == type, T_FUN_SUM == aggr_expr.get_expr_type()); - } - if (T_OP_IN == type) { - selectivity += tmp_sel; - } else { - selectivity *= tmp_sel; - } - } - if (OB_SUCC(ret)) { - selectivity *= static_cast(const_expr1->get_param_count()) - / static_cast(N); - } - } - } else if (!ObOptEstUtils::is_calculable_expr(*const_expr1, params->count())) { - } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(exec_ctx, - const_expr1, - result1, - got_result, - alloc))) { - LOG_WARN("failed to calc const or calculable expr", K(ret)); - } else if (!got_result) { - // do nothing - } else if (T_OP_EQ == type || T_OP_NSEQ == type) { - selectivity = get_agg_eq_sel(maxobj, minobj, result1, distinct_sel, rows_per_group, - true, T_FUN_SUM == aggr_expr.get_expr_type()); - } else if (T_OP_NE == type) { - selectivity = get_agg_eq_sel(maxobj, minobj, result1, distinct_sel, rows_per_group, - false, T_FUN_SUM == aggr_expr.get_expr_type()); - } else if (IS_RANGE_CMP_OP(type)) { - selectivity = get_agg_range_sel(maxobj, minobj, result1, rows_per_group, - type, T_FUN_SUM == aggr_expr.get_expr_type()); - } else if (T_OP_BTW == type || T_OP_NOT_BTW == type) { - if (OB_ISNULL(const_expr2)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (!ObOptEstUtils::is_calculable_expr(*const_expr2, params->count())) { - } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(exec_ctx, - const_expr2, - result2, - got_result, - alloc))) { - LOG_WARN("Failed to calc const or calculable expr", K(ret)); - } else if (!got_result) { - // do nothing - } else { - selectivity = get_agg_btw_sel(maxobj, minobj, result1, result2, rows_per_group, - type, T_FUN_SUM == aggr_expr.get_expr_type()); - } - } else { /* do nothing */ } - return ret; -} - -// 计算sum/avg(col) =/<=>/!= const的选择率 -double ObOptSelectivity::get_agg_eq_sel(const ObObj &maxobj, - const ObObj &minobj, - const ObObj &constobj, - const double distinct_sel, - const double rows_per_group, - const bool is_eq, - const bool is_sum) -{ - int ret = OB_SUCCESS; - double sel_ret = DEFAULT_AGG_EQ; - if (constobj.is_null()) { - // sum/avg(col)的结果中不会存在null,即使是null safe equal选择率依然为0 - sel_ret = 0.0; - } else if (minobj.is_integer_type() || - (minobj.is_number() && minobj.get_meta().get_obj_meta().get_scale() == 0) || - (minobj.is_unumber() && minobj.get_meta().get_obj_meta().get_scale() == 0)) { - double const_val; - double min_val; - double max_val; - // 如果转化的时候出错,就使用默认的选择率 - if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&constobj, const_val)) || - OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&minobj, min_val)) || - OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&maxobj, max_val))) { - LOG_WARN("failed to convert obj to double", K(ret)); - } else { - LOG_TRACE("get values for agg eq sel", K(max_val), K(min_val), K(const_val)); - if (is_sum) { - min_val *= rows_per_group; - max_val *= rows_per_group; - } - int64_t length = max_val - min_val + 1; - if (is_eq) { - sel_ret = 1.0 / length; - if (const_val < min_val) { - sel_ret -= sel_ret * (min_val - const_val) / length; - } else if (const_val > max_val) { - sel_ret -= sel_ret * (const_val - max_val) / length; - } else {} - } else { - sel_ret = 1.0 - 1.0 / length; - } - } - } else { - // 对于非整数的类型,认为sum/avg(col)后 ndv 不会发生显著变化,直接使用该列原有的ndv计算 - sel_ret = is_eq ? distinct_sel : 1.0 - distinct_sel; - } - sel_ret = revise_between_0_1(sel_ret); - return sel_ret; -} - -// 计算sum/avg(col) >/>=/ 1 <=> c1 >= 2, 对非int类型的列并不精确 - const_val += 1.0; - } - if (const_val <= min_val) { - sel_ret = 1.0; - } else if (const_val <= max_val) { - sel_ret = (max_val - const_val + 1.0) / length; - } else { - sel_ret = 1.0 / length; - sel_ret -= sel_ret * (const_val - max_val) / length; - } - } else if (T_OP_LE == type || T_OP_LT == type) { - if (T_OP_LT == type) { - // c1 < 1 <=> c1 <= 0, 对非int类型的列并不精确 - const_val -= 1.0; - } - if (const_val >= max_val) { - sel_ret = 1.0; - } else if (const_val >= min_val) { - sel_ret = (const_val - min_val + 1.0) / length; - } else { - sel_ret = 1.0 / length; - sel_ret -= sel_ret * (min_val - const_val) / length; - } - } else { /* do nothing */ } - } - } - sel_ret = revise_between_0_1(sel_ret); - return sel_ret; -} - -// 计算sum/avg(col) [not] between const1 and const2的选择率 -double ObOptSelectivity::get_agg_btw_sel(const ObObj &maxobj, - const ObObj &minobj, - const ObObj &constobj1, - const ObObj &constobj2, - const double rows_per_group, - const ObItemType type, - const bool is_sum) -{ - int ret = OB_SUCCESS; - double sel_ret = DEFAULT_AGG_RANGE; - if (constobj1.is_null() || constobj2.is_null()) { - sel_ret= 0.0; - } else { - double min_val; - double max_val; - double const_val1; - double const_val2; - // 如果转化的时候出错,就使用默认的选择率 - if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&minobj, min_val))) { - LOG_WARN("failed to convert obj to double", K(ret)); - } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&maxobj, max_val))) { - LOG_WARN("failed to convert obj to double", K(ret)); - } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&constobj1, const_val1))) { - LOG_WARN("failed to convert obj to double", K(ret)); - } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&constobj2, const_val2))) { - LOG_WARN("failed to convert obj to double", K(ret)); - } else { - LOG_TRACE("get values for agg between sel", K(max_val), K(min_val), K(const_val1), K(const_val2)); - if (is_sum) { - min_val *= rows_per_group; - max_val *= rows_per_group; - } - double length = max_val - min_val + 1.0; - if (T_OP_BTW == type) { - if (const_val1 > const_val2) { - sel_ret = 0.0; - } else { - double tmp_min = std::max(const_val1, min_val); - double tmp_max = std::min(const_val2, max_val); - sel_ret = (tmp_max - tmp_min + 1.0) / length; - } - } else if (T_OP_NOT_BTW == type){ - if (const_val1 > const_val2) { - sel_ret = 1.0; - } else { - double tmp_min = std::max(const_val1, min_val); - double tmp_max = std::min(const_val2, max_val); - sel_ret = 1 - (tmp_max - tmp_min + 1.0) / length; - } - } else { /* do nothing */ } - } - } - sel_ret = revise_between_0_1(sel_ret); - return sel_ret; -} - -int ObOptSelectivity::is_valid_agg_qual(const ObRawExpr &qual, - bool &is_valid, - const ObRawExpr *&aggr_expr, - const ObRawExpr *&const_expr1, - const ObRawExpr *&const_expr2) -{ - int ret = OB_SUCCESS; - is_valid = false; - const ObRawExpr *expr0 = NULL; - const ObRawExpr *expr1 = NULL; - const ObRawExpr *expr2 = NULL; - if (T_OP_BTW == qual.get_expr_type() || T_OP_NOT_BTW == qual.get_expr_type()) { - if (OB_ISNULL(expr0 = qual.get_param_expr(0)) || - OB_ISNULL(expr1 = qual.get_param_expr(1)) || - OB_ISNULL(expr2 = qual.get_param_expr(2))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (expr0->has_flag(IS_AGG) && - expr1->is_const_expr() && - expr2->is_const_expr()) { - is_valid = true; - aggr_expr = expr0; - const_expr1 = expr1; - const_expr2 = expr2; - } else { /* do nothing */ } - } else { - if (OB_ISNULL(expr0 = qual.get_param_expr(0)) || OB_ISNULL(expr1 = qual.get_param_expr(1))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (T_OP_IN == qual.get_expr_type() || T_OP_NOT_IN == qual.get_expr_type()) { - if (!qual.has_flag(CNT_SUB_QUERY) && - expr0->has_flag(IS_AGG) && - T_OP_ROW == expr1->get_expr_type()) { - is_valid = true; - aggr_expr = expr0; - const_expr1 = expr1; - } else { /* do nothing */ } - } else if (expr0->has_flag(IS_AGG) && - expr1->is_const_expr()) { - is_valid = true; - aggr_expr = expr0; - const_expr1 = expr1; - } else if (expr0->is_const_expr() && - expr1->has_flag(IS_AGG)) { - is_valid = true; - aggr_expr = expr1; - const_expr1 = expr0; - } else { /* do nothing */ } - } - return ret; -} - int ObOptSelectivity::check_column_in_current_level_stmt(const ObDMLStmt *stmt, const ObRawExpr &expr) { @@ -3337,7 +1573,7 @@ int ObOptSelectivity::get_column_ndv_and_nns(const OptTableMetas &table_metas, double not_null_sel = row_count <= OB_DOUBLE_EPSINON ? 1.0 : 1 - revise_between_0_1(num_null / row_count); assign_value(ndv, ndv_ptr); assign_value(not_null_sel, not_null_sel_ptr); - LOG_TRACE("column ndv and not null sel", K(ndv), K(not_null_sel)); + LOG_TRACE("column ndv and not null sel", K(ndv), K(not_null_sel), K(row_count), K(num_null)); } return ret; } @@ -3953,12 +2189,6 @@ int ObOptSelectivity::get_simple_mutex_column(const ObRawExpr *qual, const ObRaw return ret; } -/** - * 算法流程: - * 1. 简化所有的group by表达式, 取出group by中所有的列 - * 2. 对于多个列存在于同一个 Equal Set, 只保留 ndv 最小的一个. - * 3. distinct number = ndv(column1) * (ndv(column2) / sqrt(2)) * (ndv(column3) / sqrt(2)) * ... - */ int ObOptSelectivity::calculate_distinct(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObIArray& exprs, @@ -3968,31 +2198,29 @@ int ObOptSelectivity::calculate_distinct(const OptTableMetas &table_metas, { int ret = OB_SUCCESS; rows = 1; - // 记录各个列的ndv中的最大值 - ObSEArray column_exprs; - ObSEArray filtered_exprs; - if (OB_FAIL(ObRawExprUtils::extract_column_exprs(exprs, column_exprs))) { - LOG_WARN("failed to extract all column", K(ret)); + ObSEArray column_exprs; + ObSEArray special_exprs; + ObSEArray expr_ndv; + ObSEArray filtered_exprs; + //classify expr and get ndv + if (OB_FAIL(classify_exprs(exprs, column_exprs, special_exprs, table_metas, ctx))) { + LOG_WARN("failed to classify_exprs", K(ret)); } else if (OB_FAIL(filter_column_by_equal_set(table_metas, ctx, column_exprs, filtered_exprs))) { LOG_WARN("failed filter column by equal set", K(ret)); + } else if (OB_FAIL(calculate_expr_ndv(filtered_exprs, expr_ndv, table_metas, ctx, origin_rows))) { + LOG_WARN("fail to calculate expr ndv", K(ret)); + } else if (OB_FAIL(calculate_expr_ndv(special_exprs, expr_ndv, table_metas, ctx, origin_rows))) { + LOG_WARN("fail to calculate special expr ndv", K(ret)); } - - for (int64_t i = 0; OB_SUCC(ret) && i < filtered_exprs.count(); ++i) { - ObRawExpr *column_expr = filtered_exprs.at(i); - double ndv = 0.0; - if (OB_ISNULL(column_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret), K(i)); - } else if (OB_FAIL(check_column_in_current_level_stmt(ctx.get_stmt(), *column_expr))) { - LOG_WARN("failed to check column in current level stmt", K(ret)); - } else if (OB_FAIL(get_column_basic_info(table_metas, ctx, *column_expr, &ndv, NULL, NULL, NULL))) { - LOG_WARN("failed to get column basic info", K(ret), K(*column_expr)); - } else if (0 == i) { - rows *= ndv; + //calculate rows + for (int64_t i = 0; OB_SUCC(ret) && i < expr_ndv.count(); ++i) { + if (0 == i) { + rows *= expr_ndv.at(i); } else { - rows *= ndv / std::sqrt(2); + rows *= expr_ndv.at(i) / std::sqrt(2); } } + //refine if (OB_SUCC(ret) && need_refine) { rows = std::min(rows, origin_rows); LOG_TRACE("succeed to calculate distinct", K(origin_rows), K(rows), K(exprs)); @@ -4000,6 +2228,211 @@ int ObOptSelectivity::calculate_distinct(const OptTableMetas &table_metas, return ret; } +int ObOptSelectivity::classify_exprs(const ObIArray& exprs, + ObIArray& column_exprs, + ObIArray& special_exprs, + const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); ++i) { + ObRawExpr *child_expr = NULL; + if (OB_ISNULL(child_expr = exprs.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(i)); + } else if (OB_FAIL(classify_exprs(child_expr, column_exprs, special_exprs, table_metas, ctx))) { + LOG_WARN("failed to classify_exprs", K(ret)); + } + } + return ret; +} + +int ObOptSelectivity::classify_exprs(ObRawExpr* expr, + ObIArray& column_exprs, + ObIArray& special_exprs, + const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null pointer", K(expr), K(ret)); + } else if (is_special_expr(*expr)) { + if (OB_FAIL(add_var_to_array_no_dup(special_exprs, expr))) { + LOG_WARN("fail to add expr to array", K(ret)); + } + } else if (expr->is_column_ref_expr()) { + if (OB_FAIL(add_var_to_array_no_dup(column_exprs, expr))) { + LOG_WARN("fail to add expr to array", K(ret)); + } + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < expr->get_param_count(); ++i) { + ObRawExpr *child_expr = NULL; + if (OB_ISNULL(child_expr = expr->get_param_expr(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(i)); + } else if (OB_FAIL(classify_exprs(child_expr, column_exprs, special_exprs, table_metas, ctx))) { + LOG_WARN("failed to classify_exprs", K(ret)); + } + } + } + return ret; +} + +bool ObOptSelectivity::is_special_expr(const ObRawExpr &expr) { + bool is_special = false; + if (expr.is_win_func_expr()) { + is_special = true; + } + return is_special; +} + +int ObOptSelectivity::calculate_expr_ndv(const ObIArray& exprs, + ObIArray& expr_ndv, + const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const double origin_rows) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); ++i) { + ObRawExpr *expr = exprs.at(i); + double ndv = 0.0; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(i)); + } else if (expr->is_column_ref_expr()) { + if (OB_FAIL(check_column_in_current_level_stmt(ctx.get_stmt(), *expr))) { + LOG_WARN("failed to check column in current level stmt", K(ret)); + } else if (OB_FAIL(get_column_basic_info(table_metas, ctx, *expr, &ndv, NULL, NULL, NULL))) { + LOG_WARN("failed to get column basic info", K(ret), K(*expr)); + } else if (OB_FAIL(expr_ndv.push_back(ndv))) { + LOG_WARN("failed to push back expr", K(ret), K(ndv)); + } + } else if (OB_FAIL(calculate_special_ndv(table_metas, expr, ctx, ndv, origin_rows))) { + LOG_WARN("failed to calculate special expr ndv", K(ret), K(ndv)); + } else if (OB_FAIL(expr_ndv.push_back(ndv))) { + LOG_WARN("failed to push back", K(ret), K(ndv)); + } + } + return ret; +} + +int ObOptSelectivity::calculate_special_ndv(const OptTableMetas &table_metas, + const ObRawExpr* expr, + const OptSelectivityCtx &ctx, + double &special_ndv, + const double origin_rows) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(expr), K(ret)); + } else if (expr->is_win_func_expr()) { + double part_order_ndv = 1.0; + double order_ndv = 1.0; + double part_ndv = 1.0; + ObSEArray part_exprs; + ObSEArray order_exprs; + ObSEArray part_order_exprs; + const ObWinFunRawExpr *win_expr = reinterpret_cast(expr); + const ObIArray &order_items = win_expr->get_order_items(); + for (int64_t i = 0; OB_SUCC(ret) && i < order_items.count(); ++i) { + const OrderItem &order_item = order_items.at(i); + ObRawExpr *order_expr = order_item.expr_; + if (OB_ISNULL(order_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(ret)); + } else if (OB_FAIL(order_exprs.push_back(order_expr))) { + LOG_WARN("fail to push back expr", K(ret)); + } + } + + if (OB_FAIL(ret)) { + //do nothing + } else if (OB_FAIL(part_exprs.assign(win_expr->get_partition_exprs()))) { + LOG_WARN("fail to assign exprs", K(ret)); + } else if (OB_FAIL(part_order_exprs.assign(part_exprs))) { + LOG_WARN("fail to assign exprs", K(ret)); + } else if (OB_FAIL(append(part_order_exprs, order_exprs))) { + LOG_WARN("failed to append exprs", K(ret)); + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, part_order_exprs, origin_rows, part_order_ndv, false)))) { + LOG_WARN("failed to calculate_distinct", K(ret)); + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, order_exprs, origin_rows, order_ndv, false)))) { + LOG_WARN("failed to calculate_distinct", K(ret)); + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, part_exprs, origin_rows, part_ndv, false)))) { + LOG_WARN("failed to calculate_distinct", K(ret)); + } + + if (OB_FAIL(ret)) { + //do nothing + } else if (T_WIN_FUN_ROW_NUMBER == win_expr->get_func_type()) { + special_ndv = origin_rows/part_ndv; + } else if ((T_FUN_COUNT == win_expr->get_func_type() && order_exprs.count() != 0) || + T_WIN_FUN_RANK == win_expr->get_func_type() || + T_WIN_FUN_DENSE_RANK == win_expr->get_func_type() || + T_WIN_FUN_PERCENT_RANK == win_expr->get_func_type() || + T_WIN_FUN_CUME_DIST == win_expr->get_func_type()) { + special_ndv = scale_distinct(origin_rows/part_ndv, origin_rows, order_ndv); + } else if (T_WIN_FUN_NTILE == win_expr->get_func_type()) { + ObSEArray param_exprs; + ObRawExpr* const_expr = NULL; + ObObj result; + bool got_result = false; + const ParamStore *params = ctx.get_params(); + if (OB_FAIL(param_exprs.assign(win_expr->get_func_params()))) { + LOG_WARN("fail to assign exprs", K(ret)); + } else if (param_exprs.count() == 0|| OB_ISNULL(const_expr = param_exprs.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(param_exprs.count()), K(const_expr), K(ret)); + } else if (ObOptEstUtils::is_calculable_expr(*const_expr, params->count())) { + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + const_expr, + result, + got_result, + ctx.get_allocator()))) { + LOG_WARN("fail to calc_const_or_calculable_expr", K(ret)); + } else if (!got_result || result.is_null() || !ob_is_numeric_type(result.get_type())) { + special_ndv = origin_rows/part_ndv; + } else { + double n = (double)ObOptEstObjToScalar::convert_obj_to_scalar(&result); + special_ndv = std::min(origin_rows/part_ndv, n); + } + } + } else if (T_FUN_MIN == win_expr->get_func_type()|| + T_FUN_MEDIAN == win_expr->get_func_type()|| + T_WIN_FUN_MAX == win_expr->get_func_type() || + T_WIN_FUN_NTH_VALUE == win_expr->get_func_type() || + T_WIN_FUN_FIRST_VALUE == win_expr->get_func_type() || + T_WIN_FUN_LAST_VALUE == win_expr->get_func_type()) { + ObSEArray param_exprs; + double param_ndv = 1.0; + if (OB_FAIL(param_exprs.assign(win_expr->get_func_params()))) { + LOG_WARN("fail to assign exprs", K(ret)); + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, param_exprs, origin_rows, param_ndv, false)))) { + LOG_WARN("failed to calculate_distinct", K(ret)); + } else { + special_ndv = std::min(part_order_ndv, param_ndv); + } + } else if (T_WIN_FUN_LEAD == win_expr->get_func_type() || + T_WIN_FUN_LAG == win_expr->get_func_type()) { + ObSEArray param_exprs; + double param_ndv = 1.0; + if (OB_FAIL(param_exprs.assign(win_expr->get_func_params()))) { + LOG_WARN("fail to assign exprs", K(ret)); + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, param_exprs, origin_rows, param_ndv, false)))) { + LOG_WARN("failed to calculate_distinct", K(ret)); + } else { + special_ndv = param_ndv; + } + } else { + special_ndv = part_order_ndv; + } + LOG_TRACE("calculate win expr ndv", K(win_expr->get_func_type()), K(part_exprs.count()), K(order_exprs.count())); + } + special_ndv = revise_ndv(special_ndv); + return ret; +} + // 仅保留一个 ndv 最小的 distinct expr, 加入到 filtered_exprs 中; // 再把不在 equal set 中的列加入到 filtered_exprs 中, int ObOptSelectivity::filter_column_by_equal_set(const OptTableMetas &table_metas, @@ -4134,36 +2567,6 @@ int ObOptSelectivity::get_min_ndv_by_equal_set(const OptTableMetas &table_metas, return ret; } -/** - * check if multi join condition only related to two table - */ -int ObOptSelectivity::is_valid_multi_join(ObIArray &quals, - bool &is_valid) -{ - int ret = OB_SUCCESS; - is_valid = false; - if (OB_UNLIKELY(quals.count() < 2)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("quals should have more than 1 exprs", K(ret)); - } else if (OB_ISNULL(quals.at(0))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else { - const ObRelIds &rel_ids = quals.at(0)->get_relation_ids(); - is_valid = rel_ids.num_members() == 2; - for (int64_t i = 1; OB_SUCC(ret) && is_valid && i < quals.count(); ++i) { - ObRawExpr *cur_expr = quals.at(i); - if (OB_ISNULL(cur_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (!rel_ids.equal(cur_expr->get_relation_ids())) { - is_valid = false; - } - } - } - return ret; -} - int ObOptSelectivity::is_columns_contain_pkey(const OptTableMetas &table_metas, const ObIArray &col_exprs, bool &is_pkey, @@ -4236,7 +2639,8 @@ int ObOptSelectivity::extract_column_ids(const ObIArray &col_exprs, return ret; } -int ObOptSelectivity::classify_quals(const ObIArray &quals, +int ObOptSelectivity::classify_quals(const OptSelectivityCtx &ctx, + const ObIArray &quals, ObIArray &all_predicate_sel, ObIArray &column_sel_infos) { @@ -4245,10 +2649,14 @@ int ObOptSelectivity::classify_quals(const ObIArray &quals, ObSEArray column_exprs; OptSelInfo *sel_info = NULL; double tmp_selectivity = 1.0; + ObArenaAllocator tmp_alloc("ObOptSel"); + ObSelEstimatorFactory factory(tmp_alloc); + ObSEArray range_estimators; for (int64_t i = 0; OB_SUCC(ret) && i < quals.count(); ++i) { column_exprs.reset(); uint64_t column_id = OB_INVALID_ID; ObColumnRefRawExpr *column_expr = NULL; + ObSelEstimator *range_estimator = NULL; if (OB_ISNULL(qual = quals.at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected expr", K(ret)); @@ -4257,32 +2665,29 @@ int ObOptSelectivity::classify_quals(const ObIArray &quals, } else if (1 == column_exprs.count()) { column_expr = static_cast(column_exprs.at(0)); column_id = column_expr->get_column_id(); + if (!qual->has_flag(CNT_DYNAMIC_PARAM) && + OB_FAIL(ObRangeSelEstimator::create_estimator(factory, ctx, *qual, range_estimator))) { + LOG_WARN("failed to create estimator", K(ret)); + } else if (NULL != range_estimator && + OB_FAIL(ObSelEstimator::append_estimators(range_estimators, range_estimator))) { + LOG_WARN("failed to append estimators", K(ret)); + } } else { // use OB_INVALID_ID represent qual contain more than one column } if (OB_SUCC(ret) && OB_INVALID_ID != column_id && OB_NOT_NULL(column_expr)) { sel_info = NULL; - for (int64_t j = 0; j < column_sel_infos.count(); ++j) { - if (column_sel_infos.at(j).column_id_ == column_id) { - sel_info = &column_sel_infos.at(j); - break; - } - } - if (NULL == sel_info) { - if (OB_ISNULL(sel_info = column_sel_infos.alloc_place_holder())) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate place holder for sel info", K(ret)); - } else { - sel_info->column_id_ = column_id; - int64_t offset = 0; - if (ObOptimizerUtil::find_item(all_predicate_sel, - ObExprSelPair(column_expr, 0, true), - &offset)) { - sel_info->range_selectivity_ = all_predicate_sel.at(offset).sel_; - sel_info->has_range_exprs_ = true; - } - } + int64_t offset = 0; + if (OB_FAIL(get_opt_sel_info(column_sel_infos, column_expr->get_column_id(), sel_info))) { + LOG_WARN("failed to get opt sel info", K(ret)); + } else if (sel_info->has_range_exprs_) { + // do nothing + } else if (ObOptimizerUtil::find_item(all_predicate_sel, + ObExprSelPair(column_expr, 0, true), + &offset)) { + sel_info->range_selectivity_ = all_predicate_sel.at(offset).sel_; + sel_info->has_range_exprs_ = true; } } @@ -4303,6 +2708,57 @@ int ObOptSelectivity::classify_quals(const ObIArray &quals, } } } + + for (int64_t i = 0; OB_SUCC(ret) && i < range_estimators.count(); ++i) { + column_exprs.reset(); + uint64_t column_id = OB_INVALID_ID; + const ObColumnRefRawExpr *column_expr = NULL; + ObRangeSelEstimator *range_estimator = NULL; + ObObj obj_min; + ObObj obj_max; + if (OB_ISNULL(range_estimator = static_cast(range_estimators.at(i))) || + OB_UNLIKELY(ObSelEstType::RANGE != range_estimator->get_type()) || + OB_ISNULL(column_expr = range_estimator->get_column_expr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected expr", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_range_min_max( + ctx, column_expr, range_estimator->get_range_exprs(), obj_min, obj_max))) { + LOG_WARN("failed to get min max", K(ret)); + } else if (OB_FAIL(get_opt_sel_info(column_sel_infos, column_expr->get_column_id(), sel_info))) { + LOG_WARN("failed to get opt sel info", K(ret)); + } else { + if (!obj_min.is_null()) { + sel_info->min_ = obj_min; + } + if (!obj_max.is_null()) { + sel_info->max_ = obj_max; + } + } + } + return ret; +} + +int ObOptSelectivity::get_opt_sel_info(ObIArray &column_sel_infos, + const uint64_t column_id, + OptSelInfo *&sel_info) +{ + int ret = OB_SUCCESS; + sel_info = NULL; + bool found = false; + for (int64_t j = 0; !found && j < column_sel_infos.count(); ++j) { + if (column_sel_infos.at(j).column_id_ == column_id) { + sel_info = &column_sel_infos.at(j); + found = true; + } + } + if (NULL == sel_info) { + if (OB_ISNULL(sel_info = column_sel_infos.alloc_place_holder())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate place holder for sel info", K(ret)); + } else { + sel_info->column_id_ = column_id; + } + } return ret; } @@ -4549,6 +3005,108 @@ int ObOptSelectivity::get_join_pred_rows(const ObHistogram &left_hist, // return ret; // } +double ObOptSelectivity::get_filters_selectivity(ObIArray &selectivities, FilterDependencyType type) +{ + double selectivity = 0.0; + if (FilterDependencyType::INDEPENDENT == type) { + selectivity = 1.0; + for (int64_t i = 0; i < selectivities.count(); i ++) { + selectivity *= selectivities.at(i); + } + } else if (FilterDependencyType::MUTEX_OR == type) { + selectivity = 0.0; + for (int64_t i = 0; i < selectivities.count(); i ++) { + selectivity += selectivities.at(i); + } + } else if (FilterDependencyType::EXPONENTIAL_BACKOFF == type) { + selectivity = 1.0; + if (!selectivities.empty()) { + double exp = 1.0; + std::sort(&selectivities.at(0), &selectivities.at(0) + selectivities.count()); + for (int64_t i = 0; i < selectivities.count(); i ++) { + selectivity *= std::pow(selectivities.at(i), exp); + exp /= 2; + } + } + } + selectivity = revise_between_0_1(selectivity); + return selectivity; +} + +int ObOptSelectivity::remove_ignorable_func_for_est_sel(const ObRawExpr *&expr) +{ + int ret = OB_SUCCESS; + bool is_ignorable = true; + while(OB_SUCC(ret) && is_ignorable) { + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr is NULL", K(ret)); + } else if (T_FUN_SYS_CAST == expr->get_expr_type() || + T_FUN_SYS_CONVERT == expr->get_expr_type() || + T_FUN_SYS_TO_DATE == expr->get_expr_type() || + T_FUN_SYS_TO_CHAR == expr->get_expr_type() || + T_FUN_SYS_TO_NCHAR == expr->get_expr_type() || + T_FUN_SYS_TO_NUMBER == expr->get_expr_type() || + T_FUN_SYS_TO_BINARY_FLOAT == expr->get_expr_type() || + T_FUN_SYS_TO_BINARY_DOUBLE == expr->get_expr_type() || + T_FUN_SYS_SET_COLLATION == expr->get_expr_type() || + T_FUN_SYS_TO_TIMESTAMP == expr->get_expr_type() || + T_FUN_SYS_TO_TIMESTAMP_TZ == expr->get_expr_type()) { + if (OB_UNLIKELY(1 > expr->get_param_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param count", K(ret), KPC(expr)); + } else { + expr = expr->get_param_expr(0); + } + } else { + is_ignorable = false; + } + } + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr is NULL", K(ret)); + } + return ret; +} + +int ObOptSelectivity::remove_ignorable_func_for_est_sel(ObRawExpr *&expr) +{ + int ret = OB_SUCCESS; + const ObRawExpr *const_expr = expr; + ret = remove_ignorable_func_for_est_sel(const_expr); + expr = const_cast(const_expr); + return ret; +} + +double ObOptSelectivity::get_set_stmt_output_count(double count1, double count2, ObSelectStmt::SetOperator set_type) +{ + double output_count = 0.0; + // we consider the worst-case scenario + switch (set_type) { + // Assuming there are no identical values in both branches. + case ObSelectStmt::SetOperator::UNION: output_count = count1 + count2; break; + // Assuming that all values appear as much as possible in both branches + case ObSelectStmt::SetOperator::INTERSECT: output_count = std::min(count1, count2); break; + // Assuming that none of the values in the right branch appear in the left branch + case ObSelectStmt::SetOperator::EXCEPT: output_count = count1; break; + // Assuming the ratio between the output rowcount in each iteration and the previous iteration remains constant. + // And the recursion branch continues until the number of rows exceeds 100 times the number of rows in the non-recursive branch and does not exceed 7 iterations. + case ObSelectStmt::SetOperator::RECURSIVE: { + count1 = std::max(1.0, count1); + output_count = count1; + double recursive_count = count2; + int64_t i = 0; + do { + output_count += recursive_count; + recursive_count *= count2 / count1; + i ++; + } while (i < 7 && output_count <= 100 * count1); + break; + } + default: output_count = count1 + count2; break; + } + return output_count; +} }//end of namespace sql }//end of namespace oceanbase diff --git a/src/sql/optimizer/ob_opt_selectivity.h b/src/sql/optimizer/ob_opt_selectivity.h index b9a5834a99..2f2143afa4 100644 --- a/src/sql/optimizer/ob_opt_selectivity.h +++ b/src/sql/optimizer/ob_opt_selectivity.h @@ -51,6 +51,13 @@ struct ColumnItem; struct RangeExprs; struct ObExprSelPair; +enum class FilterDependencyType +{ + INDEPENDENT, + MUTEX_OR, + EXPONENTIAL_BACKOFF, +}; + class OptSelectivityCtx { public: @@ -64,7 +71,8 @@ class OptSelectivityCtx right_rel_ids_(NULL), row_count_1_(-1.0), row_count_2_(-1.0), - current_rows_(-1.0) + current_rows_(-1.0), + dependency_type_(FilterDependencyType::INDEPENDENT) { } ObOptimizerContext &get_opt_ctx() { return opt_ctx_; } @@ -104,16 +112,21 @@ class OptSelectivityCtx double get_current_rows() const { return current_rows_; } void set_current_rows(const double current_rows) { current_rows_ = current_rows; } + FilterDependencyType get_dependency_type() const { return dependency_type_; } + void set_dependency_type(FilterDependencyType type) { dependency_type_ = type; } - void init_op_ctx(const EqualSets *equal_sets, const double current_rows) + void init_op_ctx(const EqualSets *equal_sets, const double current_rows, + FilterDependencyType dependency_type = FilterDependencyType::INDEPENDENT) { equal_sets_ = equal_sets; current_rows_ = current_rows; + dependency_type_ = dependency_type; } void init_row_count(const double row_count1, const double row_count2) { row_count_1_ = row_count1; row_count_2_ = row_count2; + dependency_type_ = FilterDependencyType::INDEPENDENT; } void init_join_ctx(const ObJoinType join_type, const ObRelIds *left_rel_ids, @@ -127,12 +140,13 @@ class OptSelectivityCtx row_count_2_ = rc2; current_rows_ = -1.0; equal_sets_ = equal_sets; + dependency_type_ = FilterDependencyType::INDEPENDENT; } void clear_equal_sets() { equal_sets_ = NULL; } TO_STRING_KV(KP_(stmt), KP_(equal_sets), K_(join_type), KP_(left_rel_ids), KP_(right_rel_ids), - K_(row_count_1), K_(row_count_2), K_(current_rows)); + K_(row_count_1), K_(row_count_2), K_(current_rows), K_(dependency_type)); private: ObOptimizerContext &opt_ctx_; @@ -153,6 +167,7 @@ class OptSelectivityCtx double row_count_1_; double row_count_2_; double current_rows_; + FilterDependencyType dependency_type_; }; class OptColumnMeta @@ -207,6 +222,11 @@ public: void set_cg_skip_rate(const double skip_rate) { cg_skip_rate_ = skip_rate; } + void set_default_meta(double rows) + { + ndv_ = std::min(rows, std::max(100.0, rows / 100.0)); + num_null_ = rows * EST_DEF_COL_NULL_RATIO; + } TO_STRING_KV(K_(column_id), K_(ndv), K_(num_null), K_(avg_len), K_(hist_scale), K_(min_val), K_(max_val) , K_(min_max_inited), K_(cg_macro_blk_cnt), @@ -244,13 +264,15 @@ public: rows_(0), stat_type_(OptTableStatType::DEFAULT_TABLE_STAT), last_analyzed_(0), + stat_locked_(false), all_used_parts_(), all_used_tablets_(), pk_ids_(), column_metas_(), ds_level_(ObDynamicSamplingLevel::NO_DYNAMIC_SAMPLING), all_used_global_parts_(), - scale_ratio_(1.0) + scale_ratio_(1.0), + distinct_rows_(0.0) {} int assign(const OptTableMeta &other); @@ -306,12 +328,17 @@ public: bool use_opt_global_stat() const { return stat_type_ == OptTableStatType::OPT_TABLE_GLOBAL_STAT; } bool use_ds_stat() const { return stat_type_ == OptTableStatType::DS_TABLE_STAT; } void set_use_ds_stat() { stat_type_ = OptTableStatType::DS_TABLE_STAT; } + bool is_stat_locked() const { return stat_locked_; } + void set_stat_locked(bool locked) { stat_locked_ = locked; } + double get_distinct_rows() const { return distinct_rows_; } + void set_distinct_rows(double rows) { distinct_rows_ = rows; } + void set_ndv_for_all_column(double ndv); share::schema::ObTableType get_table_type() const { return table_type_; } TO_STRING_KV(K_(table_id), K_(ref_table_id), K_(table_type), K_(rows), K_(stat_type), K_(ds_level), K_(all_used_parts), K_(all_used_tablets), K_(pk_ids), K_(column_metas), - K_(all_used_global_parts), K_(scale_ratio)); + K_(all_used_global_parts), K_(scale_ratio), K_(stat_locked), K_(distinct_rows)); private: uint64_t table_id_; uint64_t ref_table_id_; @@ -319,6 +346,7 @@ private: double rows_; OptTableStatType stat_type_; int64_t last_analyzed_; + bool stat_locked_; int64_t micro_block_count_; @@ -329,6 +357,9 @@ private: int64_t ds_level_;//dynamic sampling level ObSEArray all_used_global_parts_; double scale_ratio_; + + // only valid for child stmt meta of set distinct stmt + double distinct_rows_; }; struct OptSelectivityDSParam { @@ -360,9 +391,10 @@ public: const OptTableStatType stat_type, ObIArray &all_used_global_parts, const double scale_ratio, - int64_t last_analyzed); + int64_t last_analyzed, + bool is_stat_locked); - int add_set_child_stmt_meta_info(const ObDMLStmt *parent_stmt, + int add_set_child_stmt_meta_info(const ObSelectStmt *parent_stmt, const ObSelectStmt *child_stmt, const uint64_t table_id, const OptTableMetas &child_table_metas, @@ -382,6 +414,9 @@ public: double &ndv, double &num_null, double &avg_len); + int get_set_stmt_output_ndv(const ObSelectStmt &stmt, + const OptTableMetas &child_table_metas, + double &ndv); common::ObIArray& get_table_metas() { return table_metas_; } const OptTableMeta* get_table_meta_by_table_id(const uint64_t table_id) const; @@ -402,7 +437,11 @@ struct OptSelInfo selectivity_(1.0), equal_count_(0), range_selectivity_(1.0), - has_range_exprs_(false) {} + has_range_exprs_(false) + { + min_.set_min_value(); + max_.set_max_value(); + } TO_STRING_KV(K_(column_id), K_(selectivity), K_(equal_count), K_(range_selectivity), K_(has_range_exprs)); @@ -412,29 +451,11 @@ struct OptSelInfo uint64_t equal_count_; double range_selectivity_; bool has_range_exprs_; + ObObj min_; + ObObj max_; }; -struct ObEstColRangeInfo -{ - ObEstColRangeInfo(double min, - double max, - const common::ObObj *startobj, - const common::ObObj *endobj, - double distinct, - bool discrete, - common::ObBorderFlag border_flag) - : min_(min), max_(max), startobj_(startobj), endobj_(endobj), - distinct_(distinct), discrete_(discrete), border_flag_(border_flag) - { } - double min_; - double max_; - const common::ObObj *startobj_; - const common::ObObj *endobj_; - double distinct_; - bool discrete_; - common::ObBorderFlag border_flag_; -}; - +class ObSelEstimator; class ObOptSelectivity { @@ -487,148 +508,6 @@ public: static inline double revise_between_0_1(double num) { return num < 0 ? 0 : (num > 1 ? 1 : num); } -private: - static int check_qual_later_calculation(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - ObRawExpr &qual, - ObIArray &all_pred_sel, - ObIArray &join_conditions, - ObIArray &range_conditions, - bool &need_skip); - - static int is_simple_join_condition(ObRawExpr &qual, - const ObRelIds *left_rel_ids, - const ObRelIds *right_rel_ids, - bool &is_valid, - ObIArray &join_conditions); - - /** - * calculate const or calculable expr selectivity. - * e.g. `1`, `1 = 1`, `1 + 1`, `1 = 0` - * if expr is always true, selectivity = 1.0 - * if expr is always false, selectivity = 0.0 - * if expr can't get actual value, like exec_param, selectivity = 0.5 - */ - static int get_const_sel(const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); - - /** - * calculate column expr selectivity. - * e.g. `c1`, `t1.c1` - * selectity = 1.0 - sel(t1.c1 = 0) - sel(t1.c1 is NULL) - */ - static int get_column_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); - - //1. var = | <=> const, get_simple_predicate_sel - //2. func(var) = | <=> const, - // only simple op(+,-,*,/), get_simple_predicate_sel, - // mod(cnt_var, mod_num), distinct_sel * mod_num - // else sqrt(distinct_sel) - //3. cnt(var) = |<=> cnt(var) get_cntcol_eq_cntcol_sel - static int get_equal_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); - - static int get_equal_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &left_expr, - const ObRawExpr &right_expr, - const bool null_safe, - double &selectivity); - - - // Get simple predicate selectivity - // (col) | (col +-* num) = const, sel = distinct_sel - // (col) | (col +-* num) = null, sel = 0 - // (col) | (col +-* num) <=> const, sel = distinct_sel - // (col) | (col +-* num) <=> null, sel = null_sel - // multi_col | func(col) =|<=> null, sel DEFAULT_EQ_SEL 0.005 - // @param partition_id only used in base table - /** - * calculate equal predicate with format `contain_column_expr = not_contain_column_expr` by ndv - * e.g. `c1 = 1`, `c1 + 1 = 2`, `c1 + c2 = 10` - * if contain_column_expr contain not monotonic operator or has more than one column, - * selectivity = DEFAULT_EQ_SEL - * if contain_column_expr contain only one column and contain only monotonic operator, - * selectivity = 1 / ndv - */ - static int get_simple_equal_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &cnt_col_expr, - const ObRawExpr *calculable_expr, - const bool null_safe, - double &selectivity); - - static int get_cntcol_op_cntcol_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &input_left_expr, - const ObRawExpr &input_right_expr, - ObItemType op_type, - double &selectivity); - - static int get_equal_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - ObIArray &quals, - double &selectivity); - - static int extract_join_exprs(ObIArray &quals, - const ObRelIds &left_rel_ids, - const ObRelIds &right_rel_ids, - ObIArray &left_exprs, - ObIArray &right_exprs, - ObIArray &null_safes); - - static int get_cntcols_eq_cntcols_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObIArray &left_exprs, - const ObIArray &right_exprs, - const ObIArray &null_safes, - double &selectivity); - - /** - * calculate [not] in predicate selectivity - * e.g. `c1 in (1, 2, 3)`, `1 in (c1, c2, c3)` - * The most commonly format `column in (const1, const2, const3)` - * selectivity = sum(selectivity(column = const_i)) - * otherwise, `var in (var1, var2, var3) - * selectivity = sum(selectivity(var = var_i)) - * not_in_selectivity = 1.0 - in_selectivity - */ - static int get_in_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); - - // get var is[not] NULL\true\false selectivity - // for var is column: - // var is NULL: selectivity = null_sel(get_var_basic_sel) - // var is true: selectivity = 1 - distinct_sel(var = 0) - null_sel - // var is false: selectivity = distinct_sel(var = 0) - // others: - // DEFAULT_SEL - // for var is not NULL\true\false: selectivity = 1.0 - is_sel - /** - * calculate is [not] predicate selectivity - * e.g. `c1 is null`, `c1 is ture`(mysql only) - */ - static int get_is_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); - - //col RANGE_CMP const, column_range_sel - //func(col) RANGE_CMP const, DEFAULT_INEQ_SEL - //col1 RANGE_CMP col2, DEFAULT_INEQ_SEL - static int get_range_cmp_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); - static int get_column_range_sel(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObColumnRefRawExpr &col_expr, @@ -644,6 +523,12 @@ private: const ObIArray &quals, double &selectivity); + static int get_column_range_min_max(const OptSelectivityCtx &ctx, + const ObColumnRefRawExpr *col_expr, + const ObIArray &quals, + ObObj &obj_min, + ObObj &obj_max); + static int calc_column_range_selectivity(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObRawExpr &column_expr, @@ -672,92 +557,6 @@ private: bool include_start, bool include_end); - /** - * calculate like predicate selectivity. - * e.g. `c1 like 'xx%'`, `c1 like '%xx'` - * c1 like 'xx%', use query range selectivity - * c1 like '%xx', use DEFAULT_INEQ_SEL 1.0 / 3.0 - */ - static int get_like_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity, - bool &can_calc_sel); - - //c1 between $val1 and $val2 -> equal with [$val2 - $val1] range sel - //c1 not between $val1 and $val2 -> equal with (min, $val1) or ($val2, max) range sel - static int get_btw_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); - - // not c1 in (a,b); not c1 > 100... - // not op. - // if can calculate null_sel, sel = 1.0 - null_sel - op_sel - // else sel = 1.0 - op_sel - static int get_not_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity, - common::ObIArray &all_predicate_sel); - - // col or (col +-* 2) != 1, 1.0 - distinct_sel - null_sel - // col or (col +-* 2) != NULL -> 0.0 - // otherwise DEFAULT_SEL; - static int get_ne_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); - - static int get_ne_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &l_expr, - const ObRawExpr &r_expr, - double &selectivity); - - static int get_agg_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); - - static int get_agg_sel_with_minmax(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &aggr_expr, - const ObRawExpr *const_expr1, - const ObRawExpr *const_expr2, - const ObItemType type, - double &selectivity, - const double rows_per_group); - - static double get_agg_eq_sel(const ObObj &maxobj, - const ObObj &minobj, - const ObObj &constobj, - const double distinct_sel, - const double rows_per_group, - const bool is_eq, - const bool is_sum); - - static double get_agg_range_sel(const ObObj &maxobj, - const ObObj &minobj, - const ObObj &constobj, - const double rows_per_group, - const ObItemType type, - const bool is_sum); - - static double get_agg_btw_sel(const ObObj &maxobj, - const ObObj &minobj, - const ObObj &constobj1, - const ObObj &constobj2, - const double rows_per_group, - const ObItemType type, - const bool is_sum); - - static int is_valid_agg_qual(const ObRawExpr &qual, - bool &is_valid, - const ObRawExpr *&aggr_expr, - const ObRawExpr *&const_expr1, - const ObRawExpr *&const_expr2); - static int check_column_in_current_level_stmt(const ObDMLStmt *stmt, const ObRawExpr &expr); static int column_in_current_level_stmt(const ObDMLStmt *stmt, @@ -894,12 +693,6 @@ private: ObRawExpr *&expr, double &ndv); - /** - * 判断多列连接是否只涉及到两个表 - */ - static int is_valid_multi_join(ObIArray &quals, - bool &is_valid); - /** * 检查一组expr是否包含所在表的主键 */ @@ -921,7 +714,8 @@ private: ObIArray &col_ids, uint64_t &table_id); - static int classify_quals(const ObIArray &quals, + static int classify_quals(const OptSelectivityCtx &ctx, + const ObIArray &quals, ObIArray &all_predicate_sel, ObIArray &column_sel_infos); @@ -977,9 +771,40 @@ private: // const ObIArray &predicates, // ObOptDSJoinParam &ds_join_param); + static double get_filters_selectivity(ObIArray &selectivities, FilterDependencyType type); + + static int get_column_min_max(ObRawExpr *expr, OptSelInfo &sel_info); + + static int calculate_special_ndv(const OptTableMetas &table_meta, + const ObRawExpr* expr, + const OptSelectivityCtx &ctx, + double &special_ndv, + const double origin_rows); + static int calculate_expr_ndv(const ObIArray& exprs, + ObIArray& expr_ndv, + const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const double origin_rows); + static bool is_special_expr(const ObRawExpr &expr); + static int classify_exprs(const ObIArray& exprs, + ObIArray& column_exprs, + ObIArray& special_exprs, + const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx); + static int classify_exprs(ObRawExpr* expr, + ObIArray& column_exprs, + ObIArray& special_exprs, + const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx); + + static int remove_ignorable_func_for_est_sel(const ObRawExpr *&expr); + static int remove_ignorable_func_for_est_sel(ObRawExpr *&expr); + static double get_set_stmt_output_count(double count1, double count2, ObSelectStmt::SetOperator set_type); + private: DISALLOW_COPY_AND_ASSIGN(ObOptSelectivity); }; + } } diff --git a/src/sql/optimizer/ob_optimizer.cpp b/src/sql/optimizer/ob_optimizer.cpp index a3ebd36970..8959edce59 100644 --- a/src/sql/optimizer/ob_optimizer.cpp +++ b/src/sql/optimizer/ob_optimizer.cpp @@ -550,6 +550,7 @@ int ObOptimizer::extract_opt_ctx_basic_flags(const ObDMLStmt &stmt, ObSQLSession bool has_subquery_in_function_table = false; bool has_dblink = false; bool force_serial_set_order = false; + bool storage_estimation_enabled = false; bool has_cursor_expr = false; int64_t link_stmt_count = 0; omt::ObTenantConfigGuard tenant_config(TENANT_CONF(session.get_effective_tenant_id())); @@ -575,7 +576,10 @@ int ObOptimizer::extract_opt_ctx_basic_flags(const ObDMLStmt &stmt, ObSQLSession LOG_WARN("fail to check rowsets enabled", K(ret)); } else if (OB_FAIL(stmt.check_has_cursor_expression(has_cursor_expr))) { LOG_WARN("fail to check cursor expression info", K(ret)); + } else if (OB_FAIL(session.is_storage_estimation_enabled(storage_estimation_enabled))) { + LOG_WARN("fail to get storage_estimation_enabled", K(ret)); } else { + ctx_.set_storage_estimation_enabled(storage_estimation_enabled); ctx_.set_serial_set_order(force_serial_set_order); ctx_.set_has_multiple_link_stmt(link_stmt_count > 1); ctx_.set_has_var_assign(has_var_assign); diff --git a/src/sql/optimizer/ob_optimizer_context.h b/src/sql/optimizer/ob_optimizer_context.h index 4d6436e416..2753b32508 100644 --- a/src/sql/optimizer/ob_optimizer_context.h +++ b/src/sql/optimizer/ob_optimizer_context.h @@ -232,7 +232,8 @@ ObOptimizerContext(ObSQLSessionInfo *session_info, hash_join_enabled_(true), optimizer_sortmerge_join_enabled_(true), nested_loop_join_enabled_(true), - system_stat_() + system_stat_(), + storage_estimation_enabled_(false) { } inline common::ObOptStatManager *get_opt_stat_manager() { return opt_stat_manager_; } inline void set_opt_stat_manager(common::ObOptStatManager *sm) { opt_stat_manager_ = sm; } @@ -306,6 +307,9 @@ ObOptimizerContext(ObSQLSessionInfo *session_info, inline bool is_pdml_heap_table() const { return is_pdml_heap_table_; } inline bool force_serial_set_order() const { return force_serial_set_order_; } void set_serial_set_order(bool force_serial_set_order) { force_serial_set_order_ = force_serial_set_order; } + + inline bool is_storage_estimation_enabled() const { return storage_estimation_enabled_; } + void set_storage_estimation_enabled(bool storage_estimation_enabled) { storage_estimation_enabled_ = storage_estimation_enabled; } inline int64_t get_parallel() const { return parallel_; } inline int64_t get_max_parallel() const { return max_parallel_; } inline int64_t get_parallel_degree_limit(const int64_t server_cnt) const { return auto_dop_params_.get_parallel_degree_limit(server_cnt); } @@ -682,6 +686,7 @@ private: bool optimizer_sortmerge_join_enabled_; bool nested_loop_join_enabled_; OptSystemStat system_stat_; + bool storage_estimation_enabled_; }; } } diff --git a/src/sql/optimizer/ob_optimizer_util.cpp b/src/sql/optimizer/ob_optimizer_util.cpp index e65c1370a2..a3ddf89af4 100644 --- a/src/sql/optimizer/ob_optimizer_util.cpp +++ b/src/sql/optimizer/ob_optimizer_util.cpp @@ -9494,6 +9494,29 @@ bool ObOptimizerUtil::find_superset(const ObRelIds &rel_ids, } return bret; } +int ObOptimizerUtil::check_is_static_false_expr(ObOptimizerContext &opt_ctx, ObRawExpr &expr, bool &is_static_false) +{ + int ret = OB_SUCCESS; + ObObj const_value; + bool got_result = false; + bool is_result_true = false; + if (!expr.is_static_const_expr()) { + // do nothing + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(opt_ctx.get_exec_ctx(), + &expr, + const_value, + got_result, + opt_ctx.get_allocator()))) { + LOG_WARN("failed to calc const or calculable expr", K(ret)); + } else if (!got_result) { + // do nothing + } else if (OB_FAIL(ObObjEvaluator::is_true(const_value, is_result_true))) { + LOG_WARN("failed to check is const value true", K(ret)); + } else { + is_static_false = !is_result_true; + } + return ret; +} int ObOptimizerUtil::check_ancestor_node_support_skip_scan(ObLogicalOperator* op, bool &can_use_batch_nlj) { diff --git a/src/sql/optimizer/ob_optimizer_util.h b/src/sql/optimizer/ob_optimizer_util.h index a97c24933e..73588df570 100644 --- a/src/sql/optimizer/ob_optimizer_util.h +++ b/src/sql/optimizer/ob_optimizer_util.h @@ -1535,6 +1535,8 @@ public: ObRawExpr *&temp_table_filter, ObSelectStmt *temp_table_query = NULL); + static int check_is_static_false_expr(ObOptimizerContext &opt_ctx, ObRawExpr &expr, bool &is_static_false); + static int check_ancestor_node_support_skip_scan(ObLogicalOperator* op, bool &can_use_batch_nlj); private: //disallow construct diff --git a/src/sql/optimizer/ob_sel_estimator.cpp b/src/sql/optimizer/ob_sel_estimator.cpp new file mode 100644 index 0000000000..8d9b1208a6 --- /dev/null +++ b/src/sql/optimizer/ob_sel_estimator.cpp @@ -0,0 +1,2790 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_OPT +#include "sql/optimizer/ob_opt_selectivity.h" +#include +#include "common/object/ob_obj_compare.h" +#include "sql/session/ob_sql_session_info.h" +#include "sql/session/ob_basic_session_info.h" +#include "share/schema/ob_part_mgr_util.h" +#include "sql/resolver/expr/ob_raw_expr_util.h" +#include "sql/rewrite/ob_query_range.h" +#include "sql/optimizer/ob_opt_est_utils.h" +#include "sql/optimizer/ob_optimizer.h" +#include "sql/optimizer/ob_optimizer_util.h" +#include "sql/rewrite/ob_transform_utils.h" +#include "sql/optimizer/ob_logical_operator.h" +#include "sql/optimizer/ob_join_order.h" +#include "common/ob_smart_call.h" +#include "share/stat/ob_dbms_stats_utils.h" +#include "sql/optimizer/ob_access_path_estimation.h" +#include "sql/optimizer/ob_sel_estimator.h" + +using namespace oceanbase::common; +using namespace oceanbase::share::schema; +namespace oceanbase +{ +namespace sql +{ +inline double revise_ndv(double ndv) { return ndv < 1.0 ? 1.0 : ndv; } + +int ObSelEstimator::append_estimators(ObIArray &sel_estimators, ObSelEstimator *new_estimator) +{ + int ret = OB_SUCCESS; + bool find_same_class = false; + if (OB_ISNULL(new_estimator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("estimator is null", K(new_estimator)); + } else if (new_estimator->is_independent()) { + // do nothing + } else { + for (int64_t i = 0; OB_SUCC(ret) && !find_same_class && i < sel_estimators.count(); i ++) { + if (OB_ISNULL(sel_estimators.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("estimator is null", K(ret), K(sel_estimators)); + } else if (OB_FAIL(sel_estimators.at(i)->merge(*new_estimator, find_same_class))) { + LOG_WARN("failed to merge same class", K(ret), KPC(sel_estimators.at(i)), KPC(new_estimator)); + } + } + } + if (OB_SUCC(ret) && !find_same_class) { + if (OB_FAIL(sel_estimators.push_back(new_estimator))) { + LOG_WARN("failed to push back", K(ret), K(sel_estimators)); + } + } + return ret; +} + +int ObDefaultSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + const ObRawExpr &qual = *expr_; + double tmp_sel = 1.0; + int64_t idx = 0; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", KPC(this)); + } else if (qual.is_spatial_expr()) { + selectivity = DEFAULT_SPATIAL_SEL; + } else if (ObOptimizerUtil::find_item(all_predicate_sel, ObExprSelPair(&qual, 0), &idx)) { + selectivity = all_predicate_sel.at(idx).sel_; + } else { + selectivity = DEFAULT_SEL; + } + return ret; +} + +int ObConstSelEstimator::get_const_sel(const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity) +{ + int ret = OB_SUCCESS; + const ParamStore *params = ctx.get_params(); + const ObDMLStmt *stmt = ctx.get_stmt(); + if (OB_ISNULL(params) || OB_ISNULL(stmt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(params), K(stmt)); + } else if (ObOptEstUtils::is_calculable_expr(qual, params->count())) { + ObObj const_value; + bool got_result = false; + bool is_true = false; + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + &qual, + const_value, + got_result, + ctx.get_allocator()))) { + LOG_WARN("failed to calc const or calculable expr", K(ret)); + } else if (!got_result) { + selectivity = DEFAULT_SEL; + } else if (OB_FAIL(ObObjEvaluator::is_true(const_value, is_true))) { + LOG_WARN("failed to check is const value true", K(ret)); + } else { + selectivity = is_true ? 1.0 : 0.0; + } + } else { + selectivity = DEFAULT_SEL; + } + return ret; +} + +int ObColumnSelEstimator::get_column_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity) +{ + int ret = OB_SUCCESS; + selectivity = DEFAULT_SEL; + double distinct_sel = 0.0; + double null_sel = 0.0; + if (!ob_is_string_or_lob_type(qual.get_data_type())) { + if (OB_FAIL(ObOptSelectivity::check_column_in_current_level_stmt(ctx.get_stmt(), qual))) { + LOG_WARN("Failed to check column in cur level stmt", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, qual, &distinct_sel, &null_sel))) { + LOG_WARN("Failed to calc basic equal sel", K(ret)); + } else { + selectivity = 1.0 - distinct_sel - null_sel; + } + } + return ret; +} + +int ObInSelEstimator::get_in_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity) +{ + int ret = OB_SUCCESS; + selectivity = 0.0; + double tmp_selectivity = 1.0; + double distinct_sel = 1.0; + double null_sel = 0.0; + const ObRawExpr *left_expr = NULL; + const ObRawExpr *right_expr = NULL; + const ObRawExpr *param_expr = NULL; + bool contain_null = false; + if (OB_UNLIKELY(2 != qual.get_param_count()) || + OB_ISNULL(left_expr = qual.get_param_expr(0)) || + OB_ISNULL(right_expr = qual.get_param_expr(1)) || + T_OP_ROW != right_expr->get_expr_type()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpect expr", K(ret), K(qual), K(left_expr), K(right_expr)); + } else if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(left_expr))) { + LOG_WARN("failed to remove ignorable function", K(ret)); + } else if (OB_LIKELY(left_expr->is_column_ref_expr() && !right_expr->has_flag(CNT_COLUMN))) { + ObOptColumnStatHandle handler; + ObObj expr_value; + bool histogram_valid = false; + const ObColumnRefRawExpr *col = static_cast(left_expr); + hash::ObHashSet obj_set; + double hist_scale = 0; + if (OB_FAIL(obj_set.create(hash::cal_next_prime(right_expr->get_param_count()), + "OptSelHashSet", "OptSelHashSet"))) { + LOG_WARN("failed to create hash set", K(ret), K(right_expr->get_param_count())); + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *left_expr, &distinct_sel, &null_sel))) { + LOG_WARN("failed to get column basic selectivity", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_hist_scale(table_metas, ctx, *left_expr, hist_scale))) { + LOG_WARN("failed to get columnn hist sample scale", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_histogram_by_column(table_metas, ctx, + col->get_table_id(), + col->get_column_id(), + handler))) { + LOG_WARN("failed to get histogram by column", K(ret)); + } else if (handler.stat_ != NULL && handler.stat_->get_histogram().is_valid()) { + histogram_valid = true; + } + for (int64_t i = 0; OB_SUCC(ret) && i < right_expr->get_param_count(); ++i) { + // bool can_use_hist = false; + bool get_value = false; + if (OB_ISNULL(param_expr = right_expr->get_param_expr(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_compare_value(ctx, col, param_expr, expr_value, get_value))) { + // cast may failed due to invalid type or value out of range. + // Then use ndv instead of histogram + get_value = false; + ret = OB_SUCCESS; + } + if (OB_SUCC(ret)) { + if (histogram_valid && get_value) { + double null_sel = 0; + if (OB_HASH_EXIST == obj_set.exist_refactored(expr_value)) { + // duplicate value, do nothing + } else if (OB_FAIL(obj_set.set_refactored(expr_value))) { + LOG_WARN("failed to set refactorcd", K(ret), K(expr_value)); + } else if (OB_FAIL(ObOptSelectivity::get_equal_pred_sel(handler.stat_->get_histogram(), + expr_value, + hist_scale, + tmp_selectivity))) { + LOG_WARN("failed to get equal density", K(ret)); + } else { + selectivity += tmp_selectivity * (1 - null_sel); + } + } else if (!get_value) { + // invalid value, for example c1 in (exec_param). Do not check obj exists. + if (param_expr->get_result_type().is_null()) { + contain_null = true; + } else { + selectivity += distinct_sel; + } + } else if (OB_HASH_EXIST == obj_set.exist_refactored(expr_value)) { + // do nothing + } else if (OB_FAIL(obj_set.set_refactored(expr_value))) { + LOG_WARN("failed to set refactorcd", K(ret), K(expr_value)); + } else if (expr_value.is_null()) { + contain_null = true; + } else { + selectivity += distinct_sel; + } + } + } + if (obj_set.created()) { + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = obj_set.destroy())) { + LOG_WARN("failed to destroy hash set", K(tmp_ret), K(ret)); + ret = COVER_SUCC(tmp_ret); + } + } + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < right_expr->get_param_count(); ++i) { + if (OB_ISNULL(param_expr = right_expr->get_param_expr(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret)); + } else if (OB_FAIL(ObEqualSelEstimator::get_equal_sel(table_metas, ctx, *left_expr, *param_expr, + false, tmp_selectivity))) { + LOG_WARN("Failed to get equal sel", K(ret), KPC(left_expr)); + } else { + selectivity += tmp_selectivity; + } + } + } + + selectivity = ObOptSelectivity::revise_between_0_1(selectivity); + if (OB_SUCC(ret) && T_OP_NOT_IN == qual.get_expr_type()) { + selectivity = 1.0 - selectivity; + if (contain_null) { + selectivity = 0.0; + } else if (left_expr->has_flag(CNT_COLUMN) && !right_expr->has_flag(CNT_COLUMN)) { + ObSEArray cur_vars; + if (OB_FAIL(ObRawExprUtils::extract_column_exprs(left_expr, cur_vars))) { + LOG_WARN("failed to extract column exprs", K(ret)); + } else if (1 == cur_vars.count()) { // only one column, consider null_sel + if (OB_ISNULL(cur_vars.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr is null", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *cur_vars.at(0), + &distinct_sel, &null_sel))) { + LOG_WARN("failed to get column basic sel", K(ret)); + } else if (distinct_sel > ((1.0 - null_sel) / 2.0)) { + // ndv < 2 + // TODO: @yibo 这个refine过程不太理解 + selectivity = distinct_sel / 2.0; + } else { + selectivity -= null_sel; + selectivity = std::max(distinct_sel, selectivity); // at least one distinct_sel + } + } else { }//do nothing + } + } + return ret; +} + +int ObIsSelEstimator::get_is_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity) +{ + int ret = OB_SUCCESS; + selectivity = DEFAULT_SEL; + const ParamStore *params = ctx.get_params(); + const ObDMLStmt *stmt = ctx.get_stmt(); + const ObRawExpr *left_expr = qual.get_param_expr(0); + const ObRawExpr *right_expr = qual.get_param_expr(1); + ObObj result; + bool got_result = false; + if (OB_ISNULL(params) || OB_ISNULL(stmt) || OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpect null", K(ret), K(params), K(stmt), K(left_expr), K(right_expr)); + } else if (OB_UNLIKELY(!ObOptEstUtils::is_calculable_expr(*right_expr, params->count()))) { + // do nothing + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + right_expr, + result, + got_result, + ctx.get_allocator()))) { + LOG_WARN("failed to calculate const or calculable expr", K(ret)); + } else if (!got_result) { + // do nothing + } else if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(left_expr))) { + LOG_WARN("failed to remove ignorable func", KPC(left_expr)); + } else if (left_expr->is_column_ref_expr()) { + if (OB_FAIL(ObOptSelectivity::check_column_in_current_level_stmt(stmt, *left_expr))) { + LOG_WARN("Failed to check column whether is in current stmt", K(ret)); + } else if (OB_LIKELY(result.is_null())) { + if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *left_expr, NULL, &selectivity))) { + LOG_WARN("Failed to get var distinct sel", K(ret)); + } + } else if (result.is_tinyint() && + !ob_is_string_or_lob_type(left_expr->get_data_type())) { + double distinct_sel = 0.0; + double null_sel = 0.0; + if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *left_expr, &distinct_sel, &null_sel))) { + LOG_WARN("Failed to get var distinct sel", K(ret)); + } else { + //distinct_num < 2. That is distinct_num only 1,(As double and statistics not completely accurate, + //use (1 - null_sel)/ 2.0 to check) + if (distinct_sel > (1 - null_sel) / 2.0) { + //Ihe formula to calc sel of 'c1 is true' is (1 - distinct_sel(var = 0) - null_sel). + //If distinct_num is 1, the sel would be 0.0. + //But we don't kown whether distinct value is 0. So gess the selectivity: (1 - null_sel)/2.0 + distinct_sel = (1- null_sel) / 2.0;//don't kow the value, just get half. + } + selectivity = (result.is_true()) ? (1 - distinct_sel - null_sel) : distinct_sel; + } + } else { }//default sel + } else { + //TODO func(cnt_column) + } + + if (T_OP_IS_NOT == qual.get_expr_type()) { + selectivity = 1.0 - selectivity; + } + return ret; +} + +int ObCmpSelEstimator::get_range_cmp_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity) +{ + int ret = OB_SUCCESS; + selectivity = DEFAULT_INEQ_SEL; + const ObRawExpr *left_expr = qual.get_param_expr(0); + const ObRawExpr *right_expr = qual.get_param_expr(1); + if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(left_expr), K(right_expr)); + } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(left_expr, left_expr)) || + OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(right_expr, right_expr))) { + LOG_WARN("failed to get expr without lossless cast", K(ret)); + } else if ((left_expr->is_column_ref_expr() && right_expr->is_const_expr()) || + (left_expr->is_const_expr() && right_expr->is_column_ref_expr())) { + const ObRawExpr *col_expr = left_expr->is_column_ref_expr() ? left_expr : right_expr; + if (OB_FAIL(ObOptSelectivity::get_column_range_sel(table_metas, ctx, + static_cast(*col_expr), + qual, selectivity))) { + LOG_WARN("Failed to get column range sel", K(qual), K(ret)); + } + } else if (T_OP_ROW == left_expr->get_expr_type() && T_OP_ROW == right_expr->get_expr_type()) { + //only deal (col1, xx, xx) CMP (const, xx, xx) + if (left_expr->get_param_count() == 1 && OB_NOT_NULL(left_expr->get_param_expr(0)) && + T_OP_ROW == left_expr->get_param_expr(0)->get_expr_type()) { + left_expr = left_expr->get_param_expr(0); + } + if (right_expr->get_param_count() == 1 && OB_NOT_NULL(right_expr->get_param_expr(0)) && + T_OP_ROW == right_expr->get_param_expr(0)->get_expr_type()) { + right_expr = right_expr->get_param_expr(0); + } + if (left_expr->get_param_count() != right_expr->get_param_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param count should be equal", + K(left_expr->get_param_count()), K(right_expr->get_param_count())); + } else if (left_expr->get_param_count() <= 1) { + // do nothing + } else if (OB_ISNULL(left_expr = left_expr->get_param_expr(0)) || + OB_ISNULL(right_expr = right_expr->get_param_expr(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(left_expr), K(right_expr)); + } else if ((left_expr->is_column_ref_expr() && right_expr->is_const_expr()) || + (left_expr->is_const_expr() && right_expr->is_column_ref_expr())) { + const ObRawExpr *col_expr = (left_expr->is_column_ref_expr()) ? (left_expr) : (right_expr); + if (OB_FAIL(ObOptSelectivity::get_column_range_sel(table_metas, ctx, + static_cast(*col_expr), + qual, selectivity))) { + LOG_WARN("failed to get column range sel", K(ret)); + } + } else { /* no dothing */ } + } + return ret; +} + +int ObBtwSelEstimator::get_btw_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity) +{ + int ret = OB_SUCCESS; + selectivity = DEFAULT_SEL; + const ObRawExpr *cmp_expr = NULL; + const ObRawExpr *l_expr = NULL; + const ObRawExpr *r_expr = NULL; + const ObRawExpr *col_expr = NULL; + const ParamStore *params = ctx.get_params(); + if (3 != qual.get_param_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("between expr should have 3 param", K(ret), K(qual)); + } else if (OB_ISNULL(params) || + OB_ISNULL(cmp_expr = qual.get_param_expr(0)) || + OB_ISNULL(l_expr = qual.get_param_expr(1)) || + OB_ISNULL(r_expr = qual.get_param_expr(2))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null params", K(ret), K(params), K(cmp_expr), K(l_expr), K(r_expr)); + } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(cmp_expr, cmp_expr))) { + LOG_WARN("failed to get expr without lossless cast", K(ret)); + } else if (cmp_expr->is_column_ref_expr() && + ObOptEstUtils::is_calculable_expr(*l_expr, params->count()) && + ObOptEstUtils::is_calculable_expr(*r_expr, params->count())) { + col_expr = cmp_expr; + } else if (ObOptEstUtils::is_calculable_expr(*cmp_expr, params->count()) && + l_expr->is_column_ref_expr() && + ObOptEstUtils::is_calculable_expr(*r_expr, params->count())) { + col_expr = l_expr; + } else if (ObOptEstUtils::is_calculable_expr(*cmp_expr, params->count()) && + ObOptEstUtils::is_calculable_expr(*l_expr, params->count()) && + r_expr->is_column_ref_expr()) { + col_expr = r_expr; + } + if (NULL != col_expr) { + if (OB_FAIL(ObOptSelectivity::get_column_range_sel(table_metas, ctx, + static_cast(*col_expr), + qual, selectivity))) { + LOG_WARN("failed to get column range sel", K(ret)); + } + } + return ret; +} + +int ObEqualSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + const ObRawExpr &qual = *expr_; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", KPC(this)); + } else { + const ObRawExpr *left_expr = qual.get_param_expr(0); + const ObRawExpr *right_expr = qual.get_param_expr(1); + if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(qual), K(left_expr), K(right_expr)); + } else if (T_OP_NE == qual.get_expr_type()) { + if (OB_FAIL(get_ne_sel(table_metas, ctx, *left_expr, *right_expr, selectivity))) { + LOG_WARN("failed to get equal sel", K(ret)); + } + } else if (T_OP_EQ == qual.get_expr_type() || + T_OP_NSEQ == qual.get_expr_type()) { + if (OB_FAIL(get_equal_sel(table_metas, ctx, *left_expr, *right_expr, + T_OP_NSEQ == qual.get_expr_type(), selectivity))) { + LOG_WARN("failed to get equal sel", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr", KPC(this)); + } + } + return ret; +} + +int ObEqualSelEstimator::get_ne_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &l_expr, + const ObRawExpr &r_expr, + double &selectivity) +{ + int ret = OB_SUCCESS; + selectivity = DEFAULT_SEL; + if (T_OP_ROW == l_expr.get_expr_type() && T_OP_ROW == r_expr.get_expr_type()) { + // (var1, var2) != (var3, var4) => var1 != var3 or var2 != var4 + selectivity = 0; + double tmp_selectivity = 1.0; + const ObRawExpr *l_param = NULL; + const ObRawExpr *r_param = NULL; + const ObRawExpr *l_row = &l_expr; + const ObRawExpr *r_row = &r_expr; + if (l_expr.get_param_count() == 1 && OB_NOT_NULL(l_expr.get_param_expr(0)) && + T_OP_ROW == l_expr.get_param_expr(0)->get_expr_type()) { + l_row = l_expr.get_param_expr(0); + } + if (r_expr.get_param_count() == 1 && OB_NOT_NULL(r_expr.get_param_expr(0)) && + T_OP_ROW == r_expr.get_param_expr(0)->get_expr_type()) { + r_row = r_expr.get_param_expr(0); + } + if (OB_UNLIKELY(l_row->get_param_count() != r_row->get_param_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected expr", KPC(l_row), KPC(r_row), K(ret)); + } else { + int64_t num = l_row->get_param_count(); + for (int64_t i = 0; OB_SUCC(ret) && i < num; ++i) { + if (OB_ISNULL(l_param = l_row->get_param_expr(i)) || + OB_ISNULL(r_param = r_row->get_param_expr(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(l_row), K(r_row), K(i)); + } else if (OB_FAIL(SMART_CALL(get_ne_sel(table_metas, ctx, *l_param, + *r_param, tmp_selectivity)))) { + LOG_WARN("failed to get equal selectivity", K(ret)); + } else { + selectivity += tmp_selectivity - selectivity * tmp_selectivity; + } + } + } + } else if (l_expr.has_flag(CNT_COLUMN) && r_expr.has_flag(CNT_COLUMN)) { + if (OB_FAIL(get_cntcol_op_cntcol_sel(table_metas, ctx, l_expr, r_expr, T_OP_NE, selectivity))) { + LOG_WARN("failed to get cntcol op cntcol sel", K(ret)); + } + } else if ((l_expr.has_flag(CNT_COLUMN) && !r_expr.has_flag(CNT_COLUMN)) || + (!l_expr.has_flag(CNT_COLUMN) && r_expr.has_flag(CNT_COLUMN))) { + const ObRawExpr *cnt_col_expr = l_expr.has_flag(CNT_COLUMN) ? &l_expr : &r_expr; + const ObRawExpr *const_expr = l_expr.has_flag(CNT_COLUMN) ? &r_expr : &l_expr; + ObSEArray column_exprs; + bool only_monotonic_op = true; + bool null_const = false; + double ndv = 1.0; + double nns = 0; + bool can_use_hist = false; + ObObj expr_value; + ObOptColumnStatHandle handler; + if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(cnt_col_expr))) { + LOG_WARN("failed to remove ignorable function", K(ret)); + } else if (cnt_col_expr->is_column_ref_expr()) { + // column != const + const ObColumnRefRawExpr *col = static_cast(cnt_col_expr); + if (OB_FAIL(ObOptSelectivity::get_histogram_by_column(table_metas, ctx, col->get_table_id(), + col->get_column_id(), handler))) { + LOG_WARN("failed to get histogram by column", K(ret)); + } else if (handler.stat_ == NULL || !handler.stat_->get_histogram().is_valid()) { + // do nothing + } else if (OB_FAIL(ObOptSelectivity::get_compare_value(ctx, col, const_expr, expr_value, can_use_hist))) { + // cast may failed due to invalid type or value out of range. + // Then use ndv instead of histogram + can_use_hist = false; + ret = OB_SUCCESS; + } + } + if (OB_SUCC(ret)) { + if (can_use_hist) { + double hist_scale = 0; + if (OB_FAIL(ObOptSelectivity::get_column_hist_scale(table_metas, ctx, *cnt_col_expr, hist_scale))) { + LOG_WARN("failed to get columnn hist sample scale", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_equal_pred_sel(handler.stat_->get_histogram(), expr_value, + hist_scale, selectivity))) { + LOG_WARN("Failed to get equal density", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *cnt_col_expr, NULL, &nns))) { + LOG_WARN("failed to get column ndv and nns", K(ret)); + } else { + selectivity = (1.0 - selectivity) * nns; + } + } else if (OB_FAIL(ObOptEstUtils::extract_column_exprs_with_op_check(cnt_col_expr, + column_exprs, + only_monotonic_op))) { + LOG_WARN("failed to extract column exprs with op check", K(ret)); + } else if (!only_monotonic_op || column_exprs.count() > 1) { + selectivity = DEFAULT_SEL; //cnt_col_expr contain not monotonic op OR has more than 1 var + } else if (OB_UNLIKELY(1 != column_exprs.count()) || OB_ISNULL(column_exprs.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected contain column expr", K(ret), K(*cnt_col_expr)); + } else if (OB_FAIL(ObOptEstUtils::if_expr_value_null(ctx.get_params(), + *const_expr, + ctx.get_opt_ctx().get_exec_ctx(), + ctx.get_allocator(), + null_const))) { + LOG_WARN("Failed to check whether expr null value", K(ret)); + } else if (null_const) { + selectivity = 0.0; + } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *column_exprs.at(0), &ndv, &nns))) { + LOG_WARN("failed to get column ndv and nns", K(ret)); + } else if (ndv < 2.0) { + //The reason doing this is similar as get_is_sel function. + //If distinct_num is 1, As formula, selectivity of 'c1 != 1' would be 0.0. + //But we don't know the distinct value, so just get the half selectivity. + selectivity = nns / ndv / 2.0; + } else { + selectivity = nns * (1.0 - 1 / ndv); + } + } + } else { }//do nothing + return ret; +} + +int ObEqualSelEstimator::get_equal_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity) +{ + int ret = OB_SUCCESS; + const ObRawExpr *left_expr = qual.get_param_expr(0); + const ObRawExpr *right_expr = qual.get_param_expr(1); + if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(qual), K(left_expr), K(right_expr)); + } else if (OB_FAIL(get_equal_sel(table_metas, ctx, *left_expr, *right_expr, + T_OP_NSEQ == qual.get_expr_type(), selectivity))) { + LOG_WARN("failed to get equal sel", K(ret)); + } + return ret; +} + +int ObEqualSelEstimator::get_equal_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &left_expr, + const ObRawExpr &right_expr, + const bool null_safe, + double &selectivity) +{ + int ret = OB_SUCCESS; + if (T_OP_ROW == left_expr.get_expr_type() && T_OP_ROW == right_expr.get_expr_type()) { + // normally row equal row will unnest as `var = var and var = var ...` + selectivity = 1.0; + double tmp_selectivity = 1.0; + const ObRawExpr *l_expr = NULL; + const ObRawExpr *r_expr = NULL; + const ObRawExpr *l_row = &left_expr; + const ObRawExpr *r_row = &right_expr; + // (c1, c2) in ((const1, const2)) may transform to (c1, c2) = ((const1, const2)) + if (left_expr.get_param_count() == 1 && OB_NOT_NULL(left_expr.get_param_expr(0)) && + T_OP_ROW == left_expr.get_param_expr(0)->get_expr_type()) { + l_row = left_expr.get_param_expr(0); + } + if (right_expr.get_param_count() == 1 && OB_NOT_NULL(right_expr.get_param_expr(0)) && + T_OP_ROW == right_expr.get_param_expr(0)->get_expr_type()) { + r_row = right_expr.get_param_expr(0); + } + if (OB_UNLIKELY(l_row->get_param_count() != r_row->get_param_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected expr", KPC(l_row), KPC(l_row), K(ret)); + } else { + int64_t num = l_row->get_param_count(); + for (int64_t i = 0; OB_SUCC(ret) && i < num; ++i) { + if (OB_ISNULL(l_expr = l_row->get_param_expr(i)) || + OB_ISNULL(r_expr = r_row->get_param_expr(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(l_expr), K(r_expr), K(i)); + } else if (OB_FAIL(SMART_CALL(get_equal_sel(table_metas, ctx, *l_expr, + *r_expr, null_safe, tmp_selectivity)))) { + LOG_WARN("failed to get equal selectivity", K(ret)); + } else { + selectivity *= tmp_selectivity; + } + } + } + } else if ((left_expr.has_flag(CNT_COLUMN) && !right_expr.has_flag(CNT_COLUMN)) || + (!left_expr.has_flag(CNT_COLUMN) && right_expr.has_flag(CNT_COLUMN))) { + // column = const + const ObRawExpr *cnt_col_expr = left_expr.has_flag(CNT_COLUMN) ? &left_expr : &right_expr; + const ObRawExpr &calc_expr = left_expr.has_flag(CNT_COLUMN) ? right_expr : left_expr; + ObOptColumnStatHandle handler; + ObObj expr_value; + bool can_use_hist = false; + if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(cnt_col_expr))) { + LOG_WARN("failed to remove ignorable function", K(ret)); + } else if (cnt_col_expr->is_column_ref_expr()) { + const ObColumnRefRawExpr* col = static_cast(cnt_col_expr); + if (OB_FAIL(ObOptSelectivity::get_histogram_by_column(table_metas, ctx, col->get_table_id(), + col->get_column_id(), handler))) { + LOG_WARN("failed to get histogram by column", K(ret)); + } else if (handler.stat_ == NULL || !handler.stat_->get_histogram().is_valid()) { + // do nothing + } else if (OB_FAIL(ObOptSelectivity::get_compare_value(ctx, col, &calc_expr, expr_value, can_use_hist))) { + // cast may failed due to invalid type or value out of range. + // Then use ndv instead of histogram + can_use_hist = false; + ret = OB_SUCCESS; + } + } + if (OB_SUCC(ret)) { + if (can_use_hist) { + double nns = 0; + double hist_scale = 0; + if (OB_FAIL(ObOptSelectivity::get_column_hist_scale(table_metas, ctx, *cnt_col_expr, hist_scale))) { + LOG_WARN("failed to get columnn hist sample scale", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_equal_pred_sel(handler.stat_->get_histogram(), expr_value, + hist_scale, selectivity))) { + LOG_WARN("Failed to get equal density", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *cnt_col_expr, NULL, &nns))) { + LOG_WARN("failed to get column ndv and nns", K(ret)); + } else { + selectivity *= nns; + } + } else if (OB_FAIL(get_simple_equal_sel(table_metas, ctx, *cnt_col_expr, + &calc_expr, null_safe, selectivity))) { + LOG_WARN("failed to get simple equal selectivity", K(ret)); + } + LOG_TRACE("succeed to get equal predicate sel", K(can_use_hist), K(selectivity)); + } + } else if (left_expr.has_flag(CNT_COLUMN) && right_expr.has_flag(CNT_COLUMN)) { + if (OB_FAIL(get_cntcol_op_cntcol_sel(table_metas, ctx, left_expr, right_expr, + null_safe ? T_OP_NSEQ : T_OP_EQ, selectivity))) { + LOG_WARN("failed to get contain column equal contain column selectivity", K(ret)); + } else { + LOG_TRACE("succeed to get contain column equal contain column sel", K(selectivity), K(ret)); + } + } else { + // CONST_PARAM = CONST_PARAM + const ParamStore *params = ctx.get_params(); + if (OB_ISNULL(params)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Params is NULL", K(ret)); + } else if (ObOptEstUtils::is_calculable_expr(left_expr, params->count()) && + ObOptEstUtils::is_calculable_expr(right_expr, params->count())) { + // 1 in (c1, 2, 3) will reach this branch + bool equal = false; + if (OB_FAIL(ObOptEstUtils::if_expr_value_equal(const_cast(ctx.get_opt_ctx()), + ctx.get_stmt(), + left_expr, right_expr, null_safe, equal))) { + LOG_WARN("Failed to check hvae equal expr", K(ret)); + } else { + selectivity = equal ? 1.0 : 0.0; + } + } else { + selectivity = DEFAULT_EQ_SEL; + } + } + return ret; +} + +int ObEqualSelEstimator::get_simple_equal_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &cnt_col_expr, + const ObRawExpr *calculable_expr, + const bool null_safe, + double &selectivity) +{ + int ret = OB_SUCCESS; + ObSEArray column_exprs; + bool only_monotonic_op = true; + const ObColumnRefRawExpr *column_expr = NULL; + double distinct_sel = 1.0; + double null_sel = 1.0; + bool is_null_value = false; + if (OB_FAIL(ObOptEstUtils::extract_column_exprs_with_op_check(&cnt_col_expr, + column_exprs, + only_monotonic_op))) { + LOG_WARN("failed to extract column exprs with op check", K(ret)); + } else if (!only_monotonic_op || column_exprs.count() > 1) { + // cnt_col_expr contain not monotonic op OR has more than 1 column + ObSEArray exprs; + ObRawExpr *expr = const_cast(&cnt_col_expr); + double ndv = 1.0; + bool refine_ndv_by_current_rows = (ctx.get_current_rows() >= 0); + if (OB_FAIL(exprs.push_back(expr))) { + LOG_WARN("failed to push back", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(table_metas, ctx, exprs, ctx.get_current_rows(), + ndv, refine_ndv_by_current_rows))) { + LOG_WARN("Failed to calculate distinct", K(ret)); + } else { + selectivity = (ndv > 1.0) ? 1 / ndv : DEFAULT_EQ_SEL; + } + } else if (OB_UNLIKELY(1 != column_exprs.count()) || + OB_ISNULL(column_expr = column_exprs.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpect column expr", K(column_exprs), K(cnt_col_expr), K(column_expr)); + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *column_expr, + &distinct_sel, &null_sel))) { + LOG_WARN("failed to get column basic selelectivity", K(ret)); + } else if (NULL == calculable_expr) { + selectivity = distinct_sel; + } else if (OB_FAIL(ObOptEstUtils::if_expr_value_null(ctx.get_params(), + *calculable_expr, + ctx.get_opt_ctx().get_exec_ctx(), + ctx.get_allocator(), + is_null_value))) { + LOG_WARN("failed to check if expr value null", K(ret)); + } else if (!is_null_value) { + selectivity = distinct_sel; + } else if (null_safe) { + selectivity = null_sel; + } else { + selectivity = 0.0; + } + return ret; +} + +int ObEqualSelEstimator::get_cntcol_op_cntcol_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &input_left_expr, + const ObRawExpr &input_right_expr, + ObItemType op_type, + double &selectivity) +{ + int ret = OB_SUCCESS; + double left_ndv = 1.0; + double right_ndv = 1.0; + double left_nns = 0.0; + double right_nns = 0.0; + selectivity = DEFAULT_EQ_SEL; + const ObRawExpr* left_expr = &input_left_expr; + const ObRawExpr* right_expr = &input_right_expr; + if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(left_expr)) || + OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(right_expr))) { + LOG_WARN("failed to remove ignorable function", K(ret)); + } else if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(left_expr), K(right_expr)); + } else if (left_expr->is_column_ref_expr() && right_expr->is_column_ref_expr()) { + const ObColumnRefRawExpr* left_col = NULL; + const ObColumnRefRawExpr* right_col = NULL; + if (OB_FAIL(ObOptSelectivity::filter_one_column_by_equal_set(table_metas, ctx, left_expr, left_expr))) { + LOG_WARN("failed filter column by equal set", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::filter_one_column_by_equal_set(table_metas, ctx, right_expr, right_expr))) { + LOG_WARN("failed filter column by equal set", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *left_expr, &left_ndv, &left_nns))) { + LOG_WARN("failed to get column basic sel", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *right_expr, + &right_ndv, &right_nns))) { + LOG_WARN("failed to get column basic sel", K(ret)); + } else if (FALSE_IT(left_col = static_cast(left_expr)) || + FALSE_IT(right_col = static_cast(right_expr))) { + // never reach + } else if (left_expr->get_relation_ids() == right_expr->get_relation_ids()) { + if (left_col->get_column_id() == right_col->get_column_id()) { + // same table same column + if (T_OP_NSEQ == op_type) { + selectivity = 1.0; + } else if (T_OP_EQ == op_type) { + selectivity = left_nns; + } else if (T_OP_NE == op_type) { + selectivity = 0.0; + } + } else { + //same table different column + if (T_OP_NSEQ == op_type) { + selectivity = left_nns * right_nns / std::max(left_ndv, right_ndv) + + (1 - left_nns) * (1 - right_nns); + } else if (T_OP_EQ == op_type) { + selectivity = left_nns * right_nns / std::max(left_ndv, right_ndv); + } else if (T_OP_NE == op_type) { + selectivity = left_nns * right_nns * (1 - 1/std::max(left_ndv, right_ndv)); + } + } + } else { + // different table + ObOptColumnStatHandle left_handler; + ObOptColumnStatHandle right_handler; + obj_cmp_func cmp_func = NULL; + bool calc_with_hist = false; + if (!ObObjCmpFuncs::can_cmp_without_cast(left_col->get_result_type(), + right_col->get_result_type(), + CO_EQ, cmp_func)) { + // do nothing + } else if (OB_FAIL(ObOptSelectivity::get_histogram_by_column(table_metas, ctx, left_col->get_table_id(), + left_col->get_column_id(), left_handler))) { + LOG_WARN("failed to get histogram by column", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_histogram_by_column(table_metas, ctx, right_col->get_table_id(), + right_col->get_column_id(), right_handler))) { + LOG_WARN("failed to get histogram by column", K(ret)); + } else if (left_handler.stat_ != NULL && right_handler.stat_ != NULL && + left_handler.stat_->get_histogram().is_frequency() && + right_handler.stat_->get_histogram().is_frequency()) { + calc_with_hist = true; + } + if (OB_FAIL(ret)) { + } else if (IS_SEMI_ANTI_JOIN(ctx.get_join_type())) { + if (OB_ISNULL(ctx.get_left_rel_ids()) || OB_ISNULL(ctx.get_right_rel_ids())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ctx.get_left_rel_ids()), K(ctx.get_right_rel_ids())); + } else if (left_expr->get_relation_ids().overlap(*ctx.get_right_rel_ids()) || + right_expr->get_relation_ids().overlap(*ctx.get_left_rel_ids())) { + std::swap(left_ndv, right_ndv); + std::swap(left_nns, right_nns); + } + if (OB_SUCC(ret)) { + if (calc_with_hist) { + double total_rows = 0; + double left_rows = 0; + double left_null = 0; + double right_rows = 0; + double right_null = 0; + if (OB_FAIL(ObOptSelectivity::get_join_pred_rows(left_handler.stat_->get_histogram(), + right_handler.stat_->get_histogram(), + true, total_rows))) { + LOG_WARN("failed to get join pred rows", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), ctx, + *left_expr, NULL, &left_null, NULL, &left_rows))) { + LOG_WARN("failed to get column basic info", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), ctx, + *right_expr, NULL, &right_null, NULL, &right_rows))) { + LOG_WARN("failed to get column basic info", K(ret)); + } else if (T_OP_NSEQ == op_type) { + total_rows += right_null > 0 ? left_null : 0; + selectivity = total_rows / left_rows; + } else if (T_OP_EQ == op_type) { + selectivity = total_rows / left_rows; + } else if (T_OP_NE == op_type) { + selectivity = ((left_rows - left_null) * (right_rows - right_null) - total_rows) + / left_rows / right_rows; + } + } else { + /** + * ## non NULL safe + * a) semi: `(min(ndv1, ndv2) / ndv1) * (1.0 - nullfrac1)` + * ## NULL safe + * a) semi: `(min(ndv1, ndv2) / ndv1) * (1.0 - nullfrac1) + nullfrac2 > 0 && nullsafe ? nullfrac1: 0` + */ + if (IS_LEFT_SEMI_ANTI_JOIN(ctx.get_join_type())) { + if (T_OP_NSEQ == op_type) { + selectivity = (std::min(left_ndv, right_ndv) / left_ndv) * left_nns; + if (1 - right_nns > 0) { + selectivity += (1 - left_nns); + } + } else if (T_OP_EQ == op_type) { + selectivity = (std::min(left_ndv, right_ndv) / left_ndv) * left_nns; + } else if (T_OP_NE == op_type) { + if (right_ndv > 1.0) { + // if right ndv > 1.0, then there must exist one value not equal to left value + selectivity = left_nns; + } else { + selectivity = (1 - 1 / left_ndv) * left_nns; + } + } + } else { + if (T_OP_NSEQ == op_type) { + selectivity = (std::min(left_ndv, right_ndv) / right_ndv) * right_nns; + if (1 - left_nns > 0) { + selectivity += (1 - right_nns); + } + } else if (T_OP_EQ == op_type) { + selectivity = (std::min(left_ndv, right_ndv) / right_ndv) * right_nns; + } else if (T_OP_NE == op_type) { + if (left_ndv > 1.0) { + // if left ndv > 1.0, then there must exist one value not equal to right value + selectivity = right_nns; + } else { + selectivity = (1 - 1 / right_ndv) * right_nns; + } + } + } + } + } + if (OB_SUCC(ret) && selectivity >= 1.0 && IS_ANTI_JOIN(ctx.get_join_type())) { + selectivity = 1 - DEFAULT_ANTI_JOIN_SEL; + } + } else { + // inner join, outer join + if (calc_with_hist) { + // use frequency histogram calculate selectivity + double total_rows = 0; + double left_rows = 0; + double left_null = 0; + double right_rows = 0; + double right_null = 0; + if (OB_FAIL(ObOptSelectivity::get_join_pred_rows(left_handler.stat_->get_histogram(), + right_handler.stat_->get_histogram(), + false, total_rows))) { + LOG_WARN("failed to get join pred rows", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), ctx, + *left_expr, NULL, &left_null, NULL, &left_rows))) { + LOG_WARN("failed to get column basic info", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), ctx, + *right_expr, NULL, &right_null, NULL, &right_rows))) { + LOG_WARN("failed to get column basic info", K(ret)); + } else if (T_OP_NSEQ == op_type) { + selectivity = (total_rows + left_null * right_null) / left_rows / right_rows; + } else if (T_OP_EQ == op_type) { + selectivity = total_rows / left_rows / right_rows; + } else if (T_OP_NE == op_type) { + selectivity = ((left_rows - left_null) * (right_rows - right_null) - total_rows) + / left_rows / right_rows; + } + } else { + /** + * ## non NULL safe + * (1.0 - nullfrac1) * (1.0 - nullfrac2) / MAX(nd1, nd2) + * ## NULL safe + * (1.0 - nullfrac1) * (1.0 - nullfrac2) / MAX(nd1, nd2) + nullfraf1 * nullfrac2 + * 目前不会特殊考虑 outer join 的选择率, 而是在外层对行数进行 revise. + */ + if (T_OP_NSEQ == op_type) { + selectivity = left_nns * right_nns / std::max(left_ndv, right_ndv) + + (1 - left_nns) * (1 - right_nns); + } else if (T_OP_EQ == op_type) { + selectivity = left_nns * right_nns / std::max(left_ndv, right_ndv); + } else if (T_OP_NE == op_type) { + selectivity = left_nns * right_nns * (1 - 1/std::max(left_ndv, right_ndv)); + } + } + } + } + } else { + // func(col) = func(col) or col = func(col) + double left_sel = 0.0; + double right_sel = 0.0; + if (OB_FAIL(get_simple_equal_sel(table_metas, ctx, *left_expr, NULL, + T_OP_NSEQ == op_type, left_sel))) { + LOG_WARN("Failed to get simple predicate sel", K(ret)); + } else if (OB_FAIL(get_simple_equal_sel(table_metas, ctx, *right_expr, NULL, + T_OP_NSEQ == op_type, right_sel))) { + LOG_WARN("Failed to get simple predicate sel", K(ret)); + } else if (IS_SEMI_ANTI_JOIN(ctx.get_join_type())) { + if (OB_ISNULL(ctx.get_left_rel_ids()) || OB_ISNULL(ctx.get_right_rel_ids())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ctx.get_left_rel_ids()), K(ctx.get_right_rel_ids())); + } else if (left_expr->get_relation_ids().overlap(*ctx.get_right_rel_ids()) || + right_expr->get_relation_ids().overlap(*ctx.get_left_rel_ids())) { + std::swap(left_sel, right_sel); + } + if (OB_SUCC(ret)) { + if (IS_LEFT_SEMI_ANTI_JOIN(ctx.get_join_type())) { + if (T_OP_NE == op_type) { + selectivity = 1 - left_sel; + } else if (right_sel < OB_DOUBLE_EPSINON) { + selectivity = 1.0; + } else { + selectivity = std::min(left_sel / right_sel, 1.0); + } + if (selectivity >= 1.0 && IS_ANTI_JOIN(ctx.get_join_type())) { + selectivity = 1 - left_sel; + } + } else { + if (T_OP_NE == op_type) { + selectivity = 1 - right_sel; + } else if (left_sel < OB_DOUBLE_EPSINON) { + selectivity = 1.0; + } else { + selectivity = std::min(right_sel / left_sel, 1.0); + } + if (selectivity >= 1.0 && IS_ANTI_JOIN(ctx.get_join_type())) { + selectivity = 1 - right_sel; + } + } + } + } else { + selectivity = std::min(left_sel, right_sel); + if (T_OP_NE == op_type) { + selectivity = 1 - selectivity; + } + } + } + return ret; +} + +int ObAggSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + const ObRawExpr &qual = *expr_; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", KPC(this)); + } else if (OB_FAIL(get_agg_sel(table_metas, ctx, qual, selectivity))) { + LOG_WARN("failed to get agg expr selectivity", K(ret), K(qual)); + } + return ret; +} + +int ObAggSelEstimator::get_agg_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity) +{ + int ret = OB_SUCCESS; + const double origin_rows = ctx.get_row_count_1(); // rows before group by + const double grouped_rows = ctx.get_row_count_2();// rows after group by + bool is_valid = false; + const ObRawExpr *aggr_expr = NULL; + const ObRawExpr *const_expr1 = NULL; + const ObRawExpr *const_expr2 = NULL; + selectivity = DEFAULT_AGG_RANGE; + ObItemType type = qual.get_expr_type(); + // for aggregate function in having clause, only support + // = <=> != > >= < <= [not] btw [not] in + if (-1.0 == origin_rows || -1.0 == grouped_rows) { + // 不是在group by层计算的having filter,使用默认选择率 + // e.g. select * from t7 group by c1 having count(*) > (select c1 from t8 limit 1); + // 该sql中having filter需要在subplan filter中计算 + } else if ((type >= T_OP_EQ && type <= T_OP_NE) || + T_OP_IN == type || T_OP_NOT_IN == type || + T_OP_BTW == type || T_OP_NOT_BTW == type) { + if (OB_FAIL(is_valid_agg_qual(qual, is_valid, aggr_expr, const_expr1, const_expr2))) { + LOG_WARN("failed to check is valid agg qual", K(ret)); + } else if (!is_valid) { + /* use default selectivity */ + } else if (OB_ISNULL(aggr_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (T_FUN_MAX == aggr_expr->get_expr_type() || + T_FUN_MIN == aggr_expr->get_expr_type() || + T_FUN_COUNT == aggr_expr->get_expr_type()) { + if (T_OP_EQ == type || T_OP_NSEQ == type) { + selectivity = DEFAULT_AGG_EQ; + } else if (T_OP_NE == type || IS_RANGE_CMP_OP(type)) { + selectivity = DEFAULT_AGG_RANGE; + } else if (T_OP_BTW == type) { + // agg(col) btw const1 and const2 <=> agg(col) > const1 AND agg(col) < const2 + selectivity = DEFAULT_AGG_RANGE * DEFAULT_AGG_RANGE; + } else if (T_OP_NOT_BTW == type) { + // agg(col) not btw const1 and const2 <=> agg(col) < const1 OR agg(col) > const2 + // 计算方式参考OR + selectivity = DEFAULT_AGG_RANGE + DEFAULT_AGG_RANGE; + } else if (T_OP_IN == type) { + /** + * oracle 对 max/min/count(col) in (const1, const2, const3, ...)的选择率估计 + * 当const的数量小于等于5时,每增加一个const值,选择率增加 DEFAULT_AGG_EQ(0.01) + * 当const的数量大于5时,每增加一个const值,选择率增加 + * DEFAULT_AGG_EQ - 0.001 * (const_num - 5) + * # 这里的选择率增加量采用线性下降其实并不是很精确,oracle的选择率增加量可能采用了了指数下降, + * 在测试过程中测试了1-30列递增的情况,线性下降和指数下降区别不大。 + */ + int64_t N; + if(OB_ISNULL(const_expr1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null"); + } else if (FALSE_IT(N = const_expr1->get_param_count())) { + } else if (N < 6) { + selectivity = DEFAULT_AGG_EQ * N; + } else { + N = std::min(N, 15L); + selectivity = DEFAULT_AGG_EQ * 5 + (DEFAULT_AGG_EQ - 0.0005 * (N - 4)) * (N - 5); + } + } else if (T_OP_NOT_IN == type) { + // agg(col) not in (const1, const2, ...) <=> agg(col) != const1 and agg(col) != const2 and ... + if(OB_ISNULL(const_expr1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null"); + } else { + selectivity = std::pow(DEFAULT_AGG_RANGE, const_expr1->get_param_count()); + } + } else { /* use default selectivity */ } + } else if (T_FUN_SUM == aggr_expr->get_expr_type() || T_FUN_AVG == aggr_expr->get_expr_type()) { + LOG_TRACE("show group by origen rows and grouped rows", K(origin_rows), K(grouped_rows)); + double rows_per_group = grouped_rows == 0.0 ? origin_rows : origin_rows / grouped_rows; + if (OB_FAIL(get_agg_sel_with_minmax(table_metas, ctx, *aggr_expr, const_expr1, + const_expr2, type, selectivity, rows_per_group))) { + LOG_WARN("failed to get agg sel with minmax", K(ret)); + } + } else { /* not max/min/count/sum/avg, use default selectivity */ } + } else { /* use default selectivity */ } + return ret; +} + +int ObAggSelEstimator::get_agg_sel_with_minmax(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &aggr_expr, + const ObRawExpr *const_expr1, + const ObRawExpr *const_expr2, + const ObItemType type, + double &selectivity, + const double rows_per_group) +{ + int ret = OB_SUCCESS; + selectivity = DEFAULT_AGG_RANGE; + const ParamStore *params = ctx.get_params(); + const ObDMLStmt *stmt = ctx.get_stmt(); + ObExecContext *exec_ctx = ctx.get_opt_ctx().get_exec_ctx(); + ObIAllocator &alloc = ctx.get_allocator(); + ObObj result1; + ObObj result2; + bool got_result; + double distinct_sel = 1.0; + ObObj maxobj; + ObObj minobj; + maxobj.set_max_value(); + minobj.set_min_value(); + if (OB_ISNULL(aggr_expr.get_param_expr(0)) || OB_ISNULL(params) || + OB_ISNULL(stmt) || OB_ISNULL(const_expr1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(aggr_expr.get_param_expr(0)), + K(params), K(stmt), K(const_expr1)); + } else if (!aggr_expr.get_param_expr(0)->is_column_ref_expr()) { + // 只处理sum(column)的形式,sum(column + 1)/sum(column1 + column2)都是用默认选择率 + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *aggr_expr.get_param_expr(0), + &distinct_sel, NULL))) { + LOG_WARN("failed to get column basic sel", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_min_max(table_metas, ctx, *aggr_expr.get_param_expr(0), + minobj, maxobj))) { + LOG_WARN("failed to get column min max", K(ret)); + } else if (minobj.is_min_value() || maxobj.is_max_value()) { + // do nothing + } else if (T_OP_IN == type || T_OP_NOT_IN == type) { + if (OB_UNLIKELY(T_OP_ROW != const_expr1->get_expr_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr should be row", K(ret), K(*const_expr1)); + } else { + // 如果row超过5列,则计算5列上的选择率,再按比例放大 + int64_t N = const_expr1->get_param_count() > 5 ? 5 :const_expr1->get_param_count(); + selectivity = T_OP_IN == type ? 0.0 : 1.0; + for (int64_t i = 0; OB_SUCC(ret) && i < N; ++i) { + double tmp_sel = T_OP_IN == type ? DEFAULT_AGG_EQ : DEFAULT_AGG_RANGE; + const ObRawExpr *sub_expr = NULL; + ObObj tmp_result; + if (OB_ISNULL(sub_expr = const_expr1->get_param_expr(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (!ObOptEstUtils::is_calculable_expr(*sub_expr, params->count())) { + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(exec_ctx, + sub_expr, + tmp_result, + got_result, + alloc))) { + LOG_WARN("failed to calc const or calculable expr", K(ret)); + } else if (!got_result) { + // do nothing + } else { + tmp_sel = get_agg_eq_sel(maxobj, minobj, tmp_result, distinct_sel, rows_per_group, + T_OP_IN == type, T_FUN_SUM == aggr_expr.get_expr_type()); + } + if (T_OP_IN == type) { + selectivity += tmp_sel; + } else { + selectivity *= tmp_sel; + } + } + if (OB_SUCC(ret)) { + selectivity *= static_cast(const_expr1->get_param_count()) + / static_cast(N); + } + } + } else if (!ObOptEstUtils::is_calculable_expr(*const_expr1, params->count())) { + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(exec_ctx, + const_expr1, + result1, + got_result, + alloc))) { + LOG_WARN("failed to calc const or calculable expr", K(ret)); + } else if (!got_result) { + // do nothing + } else if (T_OP_EQ == type || T_OP_NSEQ == type) { + selectivity = get_agg_eq_sel(maxobj, minobj, result1, distinct_sel, rows_per_group, + true, T_FUN_SUM == aggr_expr.get_expr_type()); + } else if (T_OP_NE == type) { + selectivity = get_agg_eq_sel(maxobj, minobj, result1, distinct_sel, rows_per_group, + false, T_FUN_SUM == aggr_expr.get_expr_type()); + } else if (IS_RANGE_CMP_OP(type)) { + selectivity = get_agg_range_sel(maxobj, minobj, result1, rows_per_group, + type, T_FUN_SUM == aggr_expr.get_expr_type()); + } else if (T_OP_BTW == type || T_OP_NOT_BTW == type) { + if (OB_ISNULL(const_expr2)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (!ObOptEstUtils::is_calculable_expr(*const_expr2, params->count())) { + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(exec_ctx, + const_expr2, + result2, + got_result, + alloc))) { + LOG_WARN("Failed to calc const or calculable expr", K(ret)); + } else if (!got_result) { + // do nothing + } else { + selectivity = get_agg_btw_sel(maxobj, minobj, result1, result2, rows_per_group, + type, T_FUN_SUM == aggr_expr.get_expr_type()); + } + } else { /* do nothing */ } + return ret; +} + +// 计算sum/avg(col) =/<=>/!= const的选择率 +double ObAggSelEstimator::get_agg_eq_sel(const ObObj &maxobj, + const ObObj &minobj, + const ObObj &constobj, + const double distinct_sel, + const double rows_per_group, + const bool is_eq, + const bool is_sum) +{ + int ret = OB_SUCCESS; + double sel_ret = DEFAULT_AGG_EQ; + if (constobj.is_null()) { + // sum/avg(col)的结果中不会存在null,即使是null safe equal选择率依然为0 + sel_ret = 0.0; + } else if (minobj.is_integer_type() || + (minobj.is_number() && minobj.get_meta().get_obj_meta().get_scale() == 0) || + (minobj.is_unumber() && minobj.get_meta().get_obj_meta().get_scale() == 0)) { + double const_val; + double min_val; + double max_val; + // 如果转化的时候出错,就使用默认的选择率 + if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&constobj, const_val)) || + OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&minobj, min_val)) || + OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&maxobj, max_val))) { + LOG_WARN("failed to convert obj to double", K(ret)); + } else { + LOG_TRACE("get values for agg eq sel", K(max_val), K(min_val), K(const_val)); + if (is_sum) { + min_val *= rows_per_group; + max_val *= rows_per_group; + } + int64_t length = max_val - min_val + 1; + if (is_eq) { + sel_ret = 1.0 / length; + if (const_val < min_val) { + sel_ret -= sel_ret * (min_val - const_val) / length; + } else if (const_val > max_val) { + sel_ret -= sel_ret * (const_val - max_val) / length; + } else {} + } else { + sel_ret = 1.0 - 1.0 / length; + } + } + } else { + // 对于非整数的类型,认为sum/avg(col)后 ndv 不会发生显著变化,直接使用该列原有的ndv计算 + sel_ret = is_eq ? distinct_sel : 1.0 - distinct_sel; + } + sel_ret = ObOptSelectivity::revise_between_0_1(sel_ret); + return sel_ret; +} + +// 计算sum/avg(col) >/>=/ 1 <=> c1 >= 2, 对非int类型的列并不精确 + const_val += 1.0; + } + if (const_val <= min_val) { + sel_ret = 1.0; + } else if (const_val <= max_val) { + sel_ret = (max_val - const_val + 1.0) / length; + } else { + sel_ret = 1.0 / length; + sel_ret -= sel_ret * (const_val - max_val) / length; + } + } else if (T_OP_LE == type || T_OP_LT == type) { + if (T_OP_LT == type) { + // c1 < 1 <=> c1 <= 0, 对非int类型的列并不精确 + const_val -= 1.0; + } + if (const_val >= max_val) { + sel_ret = 1.0; + } else if (const_val >= min_val) { + sel_ret = (const_val - min_val + 1.0) / length; + } else { + sel_ret = 1.0 / length; + sel_ret -= sel_ret * (min_val - const_val) / length; + } + } else { /* do nothing */ } + } + } + sel_ret = ObOptSelectivity::revise_between_0_1(sel_ret); + return sel_ret; +} + +// 计算sum/avg(col) [not] between const1 and const2的选择率 +double ObAggSelEstimator::get_agg_btw_sel(const ObObj &maxobj, + const ObObj &minobj, + const ObObj &constobj1, + const ObObj &constobj2, + const double rows_per_group, + const ObItemType type, + const bool is_sum) +{ + int ret = OB_SUCCESS; + double sel_ret = DEFAULT_AGG_RANGE; + if (constobj1.is_null() || constobj2.is_null()) { + sel_ret= 0.0; + } else { + double min_val; + double max_val; + double const_val1; + double const_val2; + // 如果转化的时候出错,就使用默认的选择率 + if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&minobj, min_val))) { + LOG_WARN("failed to convert obj to double", K(ret)); + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&maxobj, max_val))) { + LOG_WARN("failed to convert obj to double", K(ret)); + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&constobj1, const_val1))) { + LOG_WARN("failed to convert obj to double", K(ret)); + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&constobj2, const_val2))) { + LOG_WARN("failed to convert obj to double", K(ret)); + } else { + LOG_TRACE("get values for agg between sel", K(max_val), K(min_val), K(const_val1), K(const_val2)); + if (is_sum) { + min_val *= rows_per_group; + max_val *= rows_per_group; + } + double length = max_val - min_val + 1.0; + if (T_OP_BTW == type) { + if (const_val1 > const_val2) { + sel_ret = 0.0; + } else { + double tmp_min = std::max(const_val1, min_val); + double tmp_max = std::min(const_val2, max_val); + sel_ret = (tmp_max - tmp_min + 1.0) / length; + } + } else if (T_OP_NOT_BTW == type){ + if (const_val1 > const_val2) { + sel_ret = 1.0; + } else { + double tmp_min = std::max(const_val1, min_val); + double tmp_max = std::min(const_val2, max_val); + sel_ret = 1 - (tmp_max - tmp_min + 1.0) / length; + } + } else { /* do nothing */ } + } + } + sel_ret = ObOptSelectivity::revise_between_0_1(sel_ret); + return sel_ret; +} + +int ObAggSelEstimator::is_valid_agg_qual(const ObRawExpr &qual, + bool &is_valid, + const ObRawExpr *&aggr_expr, + const ObRawExpr *&const_expr1, + const ObRawExpr *&const_expr2) +{ + int ret = OB_SUCCESS; + is_valid = false; + const ObRawExpr *expr0 = NULL; + const ObRawExpr *expr1 = NULL; + const ObRawExpr *expr2 = NULL; + if (T_OP_BTW == qual.get_expr_type() || T_OP_NOT_BTW == qual.get_expr_type()) { + if (OB_ISNULL(expr0 = qual.get_param_expr(0)) || + OB_ISNULL(expr1 = qual.get_param_expr(1)) || + OB_ISNULL(expr2 = qual.get_param_expr(2))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (expr0->has_flag(IS_AGG) && + expr1->is_const_expr() && + expr2->is_const_expr()) { + is_valid = true; + aggr_expr = expr0; + const_expr1 = expr1; + const_expr2 = expr2; + } else { /* do nothing */ } + } else { + if (OB_ISNULL(expr0 = qual.get_param_expr(0)) || OB_ISNULL(expr1 = qual.get_param_expr(1))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (T_OP_IN == qual.get_expr_type() || T_OP_NOT_IN == qual.get_expr_type()) { + if (!qual.has_flag(CNT_SUB_QUERY) && + expr0->has_flag(IS_AGG) && + T_OP_ROW == expr1->get_expr_type()) { + is_valid = true; + aggr_expr = expr0; + const_expr1 = expr1; + } else { /* do nothing */ } + } else if (expr0->has_flag(IS_AGG) && + expr1->is_const_expr()) { + is_valid = true; + aggr_expr = expr0; + const_expr1 = expr1; + } else if (expr0->is_const_expr() && + expr1->has_flag(IS_AGG)) { + is_valid = true; + aggr_expr = expr1; + const_expr1 = expr0; + } else { /* do nothing */ } + } + return ret; +} + +int ObLikeSelEstimator::create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + estimator = NULL; + ObLikeSelEstimator *like_estimator = NULL; + if (T_OP_LIKE != expr.get_expr_type()) { + // do nothing + } else if (OB_FAIL(factory.create_estimator_inner(like_estimator))) { + LOG_WARN("failed to create estimator ", K(ret)); + } else { + like_estimator->expr_ = &expr; + estimator = like_estimator; + const ParamStore *params = ctx.get_params(); + if (3 != expr.get_param_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("like expr should have 3 param", K(ret), K(expr)); + } else if (OB_ISNULL(params) || + OB_ISNULL(like_estimator->variable_ = expr.get_param_expr(0)) || + OB_ISNULL(like_estimator->pattern_ = expr.get_param_expr(1)) || + OB_ISNULL(like_estimator->escape_ = expr.get_param_expr(2))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null params", K(ret), K(params), K(expr)); + } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(like_estimator->variable_, + like_estimator->variable_))) { + LOG_WARN("failed to get expr without lossless cast", K(ret)); + } else if (like_estimator->variable_->is_column_ref_expr() && + like_estimator->pattern_->is_static_const_expr() && + like_estimator->escape_->is_static_const_expr()) { + bool is_start_with = false; + if (OB_FAIL(ObOptEstUtils::if_expr_start_with_patten_sign(params, like_estimator->pattern_, + like_estimator->escape_, + ctx.get_opt_ctx().get_exec_ctx(), + ctx.get_allocator(), + is_start_with, + like_estimator->match_all_str_))) { + LOG_WARN("failed to check if expr start with percent sign", K(ret)); + } else if (like_estimator->match_all_str_) { + like_estimator->can_calc_sel_ = true; + } else if (is_lob_storage(like_estimator->variable_->get_data_type())) { + // do nothing + } else if (!is_start_with) { + like_estimator->can_calc_sel_ = true; + } + } + } + return ret; +} + +int ObLikeSelEstimator::can_calc_like_sel(const OptSelectivityCtx &ctx, const ObRawExpr &expr, bool &can_calc_sel) +{ + int ret = OB_SUCCESS; + can_calc_sel = false; + if (T_OP_LIKE == expr.get_expr_type()) { + const ParamStore *params = ctx.get_params(); + const ObRawExpr *variable = NULL; + const ObRawExpr *pattern = NULL; + const ObRawExpr *escape = NULL; + if (3 != expr.get_param_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("like expr should have 3 param", K(ret), K(expr)); + } else if (OB_ISNULL(params) || + OB_ISNULL(variable = expr.get_param_expr(0)) || + OB_ISNULL(pattern = expr.get_param_expr(1)) || + OB_ISNULL(escape = expr.get_param_expr(2))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null params", K(ret), K(params), K(expr)); + } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(variable, + variable))) { + LOG_WARN("failed to get expr without lossless cast", K(ret)); + } else if (variable->is_column_ref_expr() && + pattern->is_static_const_expr() && + escape->is_static_const_expr()) { + bool is_start_with = false; + bool match_all_str = false; + if (OB_FAIL(ObOptEstUtils::if_expr_start_with_patten_sign(params, pattern, escape, + ctx.get_opt_ctx().get_exec_ctx(), + ctx.get_allocator(), + is_start_with, + match_all_str))) { + LOG_WARN("failed to check if expr start with percent sign", K(ret)); + } else if (match_all_str) { + can_calc_sel = true; + } else if (is_lob_storage(variable->get_data_type())) { + // do nothing + } else if (!is_start_with) { + can_calc_sel = true; + } + } + } + return ret; +} + +int ObLikeSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + const ObRawExpr &qual = *expr_; + bool can_calc_sel = false; + if (OB_ISNULL(expr_) || OB_ISNULL(variable_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", KPC(this)); + } else if (match_all_str_ && can_calc_sel_) { + double nns = 0.0; + if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *variable_, NULL, &nns))) { + LOG_WARN("failed to get nns"); + } else { + selectivity = nns; + } + } else if (can_calc_sel_) { + if (OB_UNLIKELY(!variable_->is_column_ref_expr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr", KPC(variable_)); + } else if (OB_FAIL(ObOptSelectivity::get_column_range_sel(table_metas, ctx, + static_cast(*variable_), + qual, selectivity))) { + LOG_WARN("Failed to get column range selectivity", K(ret)); + } + } else if (is_lob_storage(variable_->get_data_type())) { + // no statistics for lob type, use default selectivity + selectivity = DEFAULT_CLOB_LIKE_SEL; + } else { + //try find the calc sel from dynamic sampling + int64_t idx = -1; + if (ObOptimizerUtil::find_item(all_predicate_sel, ObExprSelPair(&qual, 0), &idx)) { + selectivity = all_predicate_sel.at(idx).sel_; + } else { + selectivity = DEFAULT_INEQ_SEL; + } + } + return ret; +} + +int ObBoolOpSelEstimator::create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + estimator = NULL; + ObBoolOpSelEstimator *bool_estimator = NULL; + if (T_OP_NOT != expr.get_expr_type() && + T_OP_AND != expr.get_expr_type() && + T_OP_OR != expr.get_expr_type() && + T_FUN_SYS_LNNVL != expr.get_expr_type() && + T_OP_BOOL != expr.get_expr_type()) { + // do nothing + } else if (OB_FAIL(factory.create_estimator_inner(bool_estimator))) { + LOG_WARN("failed to create estimator ", K(ret)); + } else { + bool_estimator->expr_ = &expr; + estimator = bool_estimator; + for (int64_t i = 0; OB_SUCC(ret) && i < expr.get_param_count(); ++i) { + const ObRawExpr *child_expr = expr.get_param_expr(i); + ObSelEstimator *child_estimator = NULL; + if (OB_ISNULL(child_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret)); + } else if (OB_FAIL(SMART_CALL(factory.create_estimator(ctx, child_expr, child_estimator)))) { + LOG_WARN("failed to create estimator", KPC(child_expr)); + } else { + if (T_OP_AND == expr.get_expr_type()) { + if (OB_FAIL(append_estimators(bool_estimator->child_estimators_, child_estimator))) { + LOG_WARN("failed to append estimators", K(ret)); + } + } else { + if (OB_FAIL(bool_estimator->child_estimators_.push_back(child_estimator))) { + LOG_WARN("failed to push back estimators", K(ret)); + } + } + } + } + } + return ret; +} + +bool ObBoolOpSelEstimator::tend_to_use_ds() +{ + bool bret = false; + for (int64_t i = 0; !bret && i < child_estimators_.count(); ++i) { + ObSelEstimator *estimator = child_estimators_.at(i); + bret |= OB_NOT_NULL(estimator) ? estimator->tend_to_use_ds() : false; + } + return bret; +} + +int ObBoolOpSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + const ObRawExpr &qual = *expr_; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", KPC(this)); + } else if (T_OP_NOT == qual.get_expr_type() || + T_FUN_SYS_LNNVL == qual.get_expr_type() || + T_OP_BOOL == qual.get_expr_type()) { + ObSEArray cur_vars; + const ObRawExpr *child_expr = NULL; + ObSelEstimator *estimator = NULL; + double tmp_selectivity = 0.0; + if (OB_UNLIKELY(child_estimators_.count() != 1) || + OB_ISNULL(child_expr = qual.get_param_expr(0)) || + OB_ISNULL(estimator = child_estimators_.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param", KPC(this)); + } else if (OB_FAIL(estimator->get_sel(table_metas, ctx, tmp_selectivity, all_predicate_sel))) { + LOG_WARN("failed to get sel", KPC(estimator), K(ret)); + } else if (T_FUN_SYS_LNNVL == qual.get_expr_type()) { + selectivity = 1.0 - tmp_selectivity; + } else if (T_OP_BOOL == qual.get_expr_type()) { + selectivity = tmp_selectivity; + } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(child_expr, cur_vars))) { + LOG_WARN("failed to extract column exprs", K(ret)); + } else if (1 == cur_vars.count() && + T_OP_IS != child_expr->get_expr_type() && + T_OP_IS_NOT != child_expr->get_expr_type() && + T_OP_NSEQ != child_expr->get_expr_type()) { // for only one column, consider null_sel + double null_sel = 1.0; + if (OB_ISNULL(cur_vars.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *cur_vars.at(0), NULL, &null_sel))) { + LOG_WARN("failed to get column basic sel", K(ret)); + } else { + // not op. + // if can calculate null_sel, sel = 1.0 - null_sel - op_sel + selectivity = 1.0 - null_sel - tmp_selectivity; + } + } else { + // for other condition, it's is too hard to consider null_sel, so ignore it. + // t_op_is, t_op_nseq , they are null safe exprs, don't consider null_sel. + selectivity = 1.0 - tmp_selectivity; + } + } else if (T_OP_AND == qual.get_expr_type() || T_OP_OR == qual.get_expr_type()) { + double tmp_selectivity = 1.0; + ObSEArray selectivities; + for (int64_t i = 0; OB_SUCC(ret) && i < child_estimators_.count(); ++i) { + ObSelEstimator *estimator = NULL; + if (OB_ISNULL(estimator = child_estimators_.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param", KPC(this)); + } else if (OB_FAIL(estimator->get_sel(table_metas, ctx, tmp_selectivity, all_predicate_sel))) { + LOG_WARN("failed to get sel", KPC(estimator), K(ret)); + } else if (OB_FAIL(selectivities.push_back(tmp_selectivity))) { + LOG_WARN("failed to push back", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (T_OP_OR == qual.get_expr_type()) { + bool is_mutex = false;; + if (OB_FAIL(ObOptSelectivity::check_mutex_or(qual, is_mutex))) { + LOG_WARN("failed to check mutex or", K(ret)); + } else if (is_mutex) { + selectivity = ObOptSelectivity::get_filters_selectivity(selectivities, FilterDependencyType::MUTEX_OR); + } else { + // sel(p1 or p2 or p3) = sel(!(!p1 and !p2 and !p3)) + for (int64_t i = 0; i < selectivities.count(); i ++) { + selectivities.at(i) = 1 - selectivities.at(i); + } + selectivity = ObOptSelectivity::get_filters_selectivity(selectivities, ctx.get_dependency_type()); + selectivity = 1- selectivity; + } + } else { + selectivity = ObOptSelectivity::get_filters_selectivity(selectivities, ctx.get_dependency_type()); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr", KPC(this)); + } + return ret; +} + +int ObRangeSelEstimator::create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + UNUSED(ctx); + bool is_valid = true; + estimator = NULL; + ObArray column_exprs; + ObRangeSelEstimator *range_estimator = NULL; + if (OB_FAIL(ObOptEstUtils::is_range_expr(&expr, is_valid))) { + LOG_WARN("judge range expr failed", K(ret)); + } else if (!is_valid) { + // do nothing + } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(&expr, column_exprs))) { + LOG_WARN("extract_column_exprs error in clause_selectivity", K(ret)); + } else if (column_exprs.count() != 1) { + is_valid = false; + } else if (OB_FAIL(factory.create_estimator_inner(range_estimator))) { + LOG_WARN("failed to create estimator ", K(ret)); + } else { + range_estimator->column_expr_ = static_cast(column_exprs.at(0)); + if (OB_FAIL(range_estimator->range_exprs_.push_back(const_cast(&expr)))) { + LOG_WARN("failed to push back", K(ret)); + } else { + estimator = range_estimator; + } + } + return ret; +} + +int ObRangeSelEstimator::merge(const ObSelEstimator &other, bool &is_success) +{ + int ret = OB_SUCCESS; + is_success = false; + if (get_type() == other.get_type()) { + const ObRangeSelEstimator &est_other = static_cast(other); + if (column_expr_ == est_other.column_expr_) { + is_success = true; + if (OB_FAIL(append(range_exprs_, est_other.range_exprs_))) { + LOG_WARN("failed to append", K(ret)); + } + } + } + return ret; +} + +int ObRangeSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(column_expr_) || OB_UNLIKELY(range_exprs_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr", KPC(this)); + } else if (OB_FAIL(ObOptSelectivity::get_column_range_sel(table_metas, ctx, *column_expr_, range_exprs_, selectivity))) { + LOG_WARN("failed to calc qual selectivity", KPC(column_expr_), K(range_exprs_), K(ret)); + } else { + selectivity = ObOptSelectivity::revise_between_0_1(selectivity); + } + return ret; +} + +int ObSimpleJoinSelEstimator::create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + estimator = NULL; + ObSimpleJoinSelEstimator *simple_join_estimator = NULL; + bool is_valid = false; + const ObRelIds *left_rel_ids = ctx.get_left_rel_ids(); + const ObRelIds *right_rel_ids = ctx.get_right_rel_ids(); + if (OB_FAIL(is_simple_join_condition(expr, ctx.get_left_rel_ids(), ctx.get_right_rel_ids(), is_valid))) { + LOG_WARN("failed to check is simple join", K(ret)); + } else if (!is_valid) { + // do nothing + } else if (OB_FAIL(factory.create_estimator_inner(simple_join_estimator))) { + LOG_WARN("failed to create estimator ", K(ret)); + } else if (OB_FAIL(simple_join_estimator->join_conditions_.push_back(const_cast(&expr)))) { + LOG_WARN("failed to push back", K(ret)); + } else { + simple_join_estimator->left_rel_ids_ = left_rel_ids; + simple_join_estimator->right_rel_ids_ = right_rel_ids; + estimator = simple_join_estimator; + } + return ret; +} + +/** + * check if qual is a simple join condition. + * This recommend each side of `=` belong to different subtree. + */ +int ObSimpleJoinSelEstimator::is_simple_join_condition(const ObRawExpr &qual, + const ObRelIds *left_rel_ids, + const ObRelIds *right_rel_ids, + bool &is_valid) +{ + int ret = OB_SUCCESS; + is_valid = false; + if (NULL == left_rel_ids || NULL == right_rel_ids) { + // do nothing + } else if (T_OP_EQ == qual.get_expr_type() || T_OP_NSEQ == qual.get_expr_type()) { + const ObRawExpr *expr0 = qual.get_param_expr(0); + const ObRawExpr *expr1 = qual.get_param_expr(1); + if (OB_ISNULL(expr0) || OB_ISNULL(expr1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null exprs", K(ret), K(expr0), K(expr1)); + } else if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(expr0)) || + OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(expr1))) { + LOG_WARN("failed to remove ignorable function", K(ret)); + } else if (!expr0->is_column_ref_expr() || !expr1->is_column_ref_expr()) { + // do nothing + } else if ((left_rel_ids->is_superset(expr0->get_relation_ids()) && + right_rel_ids->is_superset(expr1->get_relation_ids())) || + (left_rel_ids->is_superset(expr1->get_relation_ids()) && + right_rel_ids->is_superset(expr0->get_relation_ids()))) { + is_valid = true; + } + } + return ret; +} + +int ObSimpleJoinSelEstimator::merge(const ObSelEstimator &other, bool &is_success) +{ + int ret = OB_SUCCESS; + is_success = false; + if (get_type() == other.get_type()) { + const ObSimpleJoinSelEstimator &est_other = static_cast(other); + if (OB_ISNULL(left_rel_ids_) || OB_ISNULL(right_rel_ids_) || + OB_ISNULL(est_other.left_rel_ids_) || OB_ISNULL(est_other.right_rel_ids_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected NULL", KPC(this), K(est_other)); + } else if (*left_rel_ids_ == *est_other.left_rel_ids_ && + *right_rel_ids_ == *est_other.right_rel_ids_) { + is_success = true; + if (OB_FAIL(append(join_conditions_, est_other.join_conditions_))) { + LOG_WARN("failed to append", K(ret)); + } + } + } + return ret; +} + +int ObSimpleJoinSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + selectivity = 1.0; + if (OB_UNLIKELY(join_conditions_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected empty join condition", KPC(this)); + } else if (1 == join_conditions_.count()) { + // only one join condition, calculate selectivity directly + if (OB_ISNULL(join_conditions_.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(ObEqualSelEstimator::get_equal_sel(table_metas, ctx, *join_conditions_.at(0), selectivity))) { + LOG_WARN("Failed to get equal selectivity", K(ret)); + } else { + LOG_PRINT_EXPR(TRACE, "get single equal expr selectivity", *join_conditions_.at(0), K(selectivity)); + } + } else if (join_conditions_.count() > 1) { + // 存在多个连接条件,检查是否涉及联合主键 + if (OB_FAIL(get_multi_equal_sel(table_metas, ctx, join_conditions_, selectivity))) { + LOG_WARN("failed to get equal sel"); + } else { + selectivity = ObOptSelectivity::revise_between_0_1(selectivity); + LOG_TRACE("get multi equal expr selectivity", KPC(this), K(selectivity)); + } + } + return ret; +} + +int ObSimpleJoinSelEstimator::get_multi_equal_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + ObIArray &quals, + double &selectivity) +{ + int ret = OB_SUCCESS; + ObSEArray left_exprs; + ObSEArray right_exprs; + ObSEArray null_safes; + bool is_valid; + if (OB_ISNULL(ctx.get_left_rel_ids()) || OB_ISNULL(ctx.get_right_rel_ids())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed get unexpected null", K(ret), K(ctx)); + } else if (OB_FAIL(is_valid_multi_join(quals, is_valid))) { + LOG_WARN("failed to check is valid multi join", K(ret)); + } else if (!is_valid) { + // multi join condition related to more than two table. Calculate selectivity for each join + // condition independently. + for (int64_t i = 0; OB_SUCC(ret) && i < quals.count(); ++i) { + ObRawExpr *cur_expr = quals.at(i); + double tmp_sel = 1.0; + if (OB_ISNULL(cur_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(ObEqualSelEstimator::get_equal_sel(table_metas, ctx, *cur_expr, tmp_sel))) { + LOG_WARN("failed to get equal selectivity", K(ret)); + } else { + selectivity *= tmp_sel; + } + } + } else if (OB_FAIL(extract_join_exprs(quals, *ctx.get_left_rel_ids(), *ctx.get_right_rel_ids(), + left_exprs, right_exprs, null_safes))) { + LOG_WARN("failed to extract join exprs", K(ret)); + } else if (OB_FAIL(get_cntcols_eq_cntcols_sel(table_metas, ctx, left_exprs, right_exprs, + null_safes, selectivity))) { + LOG_WARN("Failed to get equal sel", K(ret)); + } else { /* do nothing */ } + return ret; +} + +/** + * check if multi join condition only related to two table + */ +int ObSimpleJoinSelEstimator::is_valid_multi_join(ObIArray &quals, + bool &is_valid) +{ + int ret = OB_SUCCESS; + is_valid = false; + if (OB_UNLIKELY(quals.count() < 2)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("quals should have more than 1 exprs", K(ret)); + } else if (OB_ISNULL(quals.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else { + const ObRelIds &rel_ids = quals.at(0)->get_relation_ids(); + is_valid = rel_ids.num_members() == 2; + for (int64_t i = 1; OB_SUCC(ret) && is_valid && i < quals.count(); ++i) { + ObRawExpr *cur_expr = quals.at(i); + if (OB_ISNULL(cur_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (!rel_ids.equal(cur_expr->get_relation_ids())) { + is_valid = false; + } + } + } + return ret; +} + +int ObSimpleJoinSelEstimator::extract_join_exprs(ObIArray &quals, + const ObRelIds &left_rel_ids, + const ObRelIds &right_rel_ids, + ObIArray &left_exprs, + ObIArray &right_exprs, + ObIArray &null_safes) +{ + int ret = OB_SUCCESS; + ObRawExpr *left_expr = NULL; + ObRawExpr *right_expr = NULL; + for (int64_t i = 0; OB_SUCC(ret) && i < quals.count(); ++i) { + ObRawExpr *cur_expr = quals.at(i); + if (OB_ISNULL(cur_expr) || + OB_ISNULL(left_expr = cur_expr->get_param_expr(0)) || + OB_ISNULL(right_expr = cur_expr->get_param_expr(1))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(cur_expr), K(left_expr), K(right_expr)); + } else if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(left_expr)) || + OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(right_expr))) { + LOG_WARN("failed to remove ignorable function", K(ret)); + } else if (OB_UNLIKELY(!left_expr->is_column_ref_expr() || !right_expr->is_column_ref_expr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("all expr should be column ref", K(ret), K(*cur_expr)); + } else if (left_rel_ids.is_superset(left_expr->get_relation_ids()) && + right_rel_ids.is_superset(right_expr->get_relation_ids())) { + // do nothing + } else if (left_rel_ids.is_superset(right_expr->get_relation_ids()) && + right_rel_ids.is_superset(left_expr->get_relation_ids())) { + std::swap(left_expr, right_expr); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected expr", K(ret), K(left_expr), K(right_expr)); + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(left_exprs.push_back(left_expr))) { + LOG_WARN("failed to push back expr", K(ret)); + } else if (OB_FAIL(right_exprs.push_back(right_expr))) { + LOG_WARN("failed to push back expr", K(ret)); + } else if (OB_FAIL(null_safes.push_back(T_OP_NSEQ == cur_expr->get_expr_type()))) { + LOG_WARN("failed to push back null safe", K(ret)); + } + } + } + return ret; +} + +int ObSimpleJoinSelEstimator::get_cntcols_eq_cntcols_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObIArray &left_exprs, + const ObIArray &right_exprs, + const ObIArray &null_safes, + double &selectivity) +{ + int ret = OB_SUCCESS; + selectivity = DEFAULT_EQ_SEL; + ObSEArray left_ndvs; + ObSEArray right_ndvs; + ObSEArray left_not_null_sels; + ObSEArray right_not_null_sels; + double left_ndv = 1.0; + double right_ndv = 1.0; + double left_nns = 1.0; + double right_nns = 1.0; + double left_rows = 1.0; + double right_rows = 1.0; + double left_origin_rows = 1.0; + double right_origin_rows = 1.0; + bool left_contain_pk = false; + bool right_contain_pk = false; + bool is_union_pk = false; + bool refine_right_ndv = false; + bool refine_left_ndv = false; + + if (OB_ISNULL(ctx.get_plan())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::is_columns_contain_pkey(table_metas, left_exprs, + left_contain_pk, is_union_pk))) { + LOG_WARN("failed to check is columns contain pkey", K(ret)); + } else if (OB_FALSE_IT(refine_right_ndv = left_contain_pk && is_union_pk)) { + } else if (OB_FAIL(ObOptSelectivity::is_columns_contain_pkey(table_metas, right_exprs, + right_contain_pk, is_union_pk))) { + LOG_WARN("failed to check is columns contain pkey", K(ret)); + } else if (OB_FALSE_IT(refine_left_ndv = right_contain_pk && is_union_pk)) { + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < left_exprs.count(); ++i) { + if (OB_ISNULL(left_exprs.at(i)) || OB_ISNULL(right_exprs.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *left_exprs.at(i), + &left_ndv, &left_nns))) { + LOG_WARN("failed to get left ndv and nns", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *right_exprs.at(i), + &right_ndv, &right_nns))) { + LOG_WARN("failed to get left ndv and nns", K(ret)); + } else if (OB_FAIL(left_not_null_sels.push_back(left_nns))) { + LOG_WARN("failed to push back not null sel", K(ret)); + } else if (OB_FAIL(right_not_null_sels.push_back(right_nns))) { + LOG_WARN("failed to push back not null sel", K(ret)); + } else if (OB_FAIL(left_ndvs.push_back(left_ndv))) { + LOG_WARN("failed to push back ndv", K(ret)); + } else if (OB_FAIL(right_ndvs.push_back(right_ndv))) { + LOG_WARN("failed to push back ndv", K(ret)); + } else if (0 == i) { + if (OB_FAIL(ObOptSelectivity::get_column_basic_info(table_metas, ctx, *left_exprs.at(i), + NULL, NULL, NULL, &left_rows))) { + LOG_WARN("failed to get column basic info", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_basic_info(table_metas, ctx, *right_exprs.at(i), + NULL, NULL, NULL, &right_rows))) { + LOG_WARN("failed to get column basic info", K(ret)); + } else if (refine_right_ndv && + OB_FAIL(ObOptSelectivity::get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), + ctx, *left_exprs.at(i), + NULL, NULL, NULL, &left_origin_rows))) { + LOG_WARN("failed to get column basic info", K(ret)); + } else if (refine_left_ndv && + OB_FAIL(ObOptSelectivity::get_column_basic_info(ctx.get_plan()->get_basic_table_metas(), + ctx, *right_exprs.at(i), + NULL, NULL, NULL, &right_origin_rows))) { + LOG_WARN("failed to get column basic info", K(ret)); + } + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(table_metas, ctx, left_exprs, left_rows, left_ndv))) { + LOG_WARN("Failed to calculate distinct", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(table_metas, ctx, right_exprs, right_rows, right_ndv))) { + LOG_WARN("Failed to calculate distinct", K(ret)); + } else if (IS_SEMI_ANTI_JOIN(ctx.get_join_type())) { + /** + * 对于 semi anti join, 选择率描述的是外表行数为基础的选择率 + * # FORMULA + * ## non NULL safe + * a) semi: `(min(left_ndv, right_ndv) / left_ndv) * left_not_null_sel(i)` + * ## NULL safe + * a) semi: non NULL safe selectivity + `nullsafe(i) && left_not_null_sel(i) < 1.0 ? null_sel(i) * selectivity(j) [where j != i]: 0` + */ + if (IS_LEFT_SEMI_ANTI_JOIN(ctx.get_join_type())) { + selectivity = std::min(left_ndv, right_ndv) / left_ndv; + for (int64_t i = 0; i < left_not_null_sels.count(); ++i) { + selectivity *= left_not_null_sels.at(i); + } + // 处理 null safe,这里假设多列上同时为null即小概率事件,只考虑特定列上为null的情况 + for (int64_t i = 0; i < null_safes.count(); ++i) { + if (OB_UNLIKELY(null_safes.at(i) && right_not_null_sels.at(i) < 1.0)) { + double factor = 1.0; + for (int64_t j = 0; j < null_safes.count(); ++j) { + if (i == j) { + factor *= (1 - left_not_null_sels.at(j)); + } else { + factor *= left_not_null_sels.at(j) * std::min(left_ndvs.at(j), right_ndvs.at(j)) / left_ndvs.at(j); + } + } + selectivity += factor; + } + } + } else { + selectivity = std::min(left_ndv, right_ndv) / right_ndv; + for (int64_t i = 0; i < right_not_null_sels.count(); ++i) { + selectivity *= right_not_null_sels.at(i); + } + // 处理 null safe,这里假设多列上同时为null即小概率事件,只考虑特定列上为null的情况 + for (int64_t i = 0; i < null_safes.count(); ++i) { + if (OB_UNLIKELY(null_safes.at(i) && right_not_null_sels.at(i) < 1.0)) { + double factor = 1.0; + for (int64_t j = 0; j < null_safes.count(); ++j) { + if (i == j) { + factor *= (1 - right_not_null_sels.at(j)); + } else { + factor *= right_not_null_sels.at(j) * std::min(left_ndvs.at(j), right_ndvs.at(j)) / right_ndvs.at(j); + } + } + selectivity += factor; + } + } + } + } else { + /** + * # FORMULA + * ## non NULL safe + * 1 / MAX(ndv1, ndv2) * not_null_frac1_col1 * not_null_frac2_col1 * not_null_frac1_col2 * not_null_frac2_col2 * ... + * ## NULL safe + * non NULL safe selectivity + `nullsafe(i) ? (1 - not_null_frac1_col(i)) * (1 - not_null_frac2_col(i)) * selectivity(col(j)) [where j != i]: 0` + * 目前不会特殊考虑 outer join 的选择率, 而是在外层对行数进行 revise. + */ + if (left_contain_pk == right_contain_pk) { + // 两侧都不是主键或都是主键, 不做修正 + } else if (refine_right_ndv) { + // 一侧有主键时, 认为是主外键连接, 外键上最大的ndv为即为主键的原始ndv + right_ndv = std::min(right_ndv, left_origin_rows); + } else if (refine_left_ndv) { + left_ndv = std::min(left_ndv, right_origin_rows); + } else { + // do nothing + } + selectivity = 1.0 / std::max(left_ndv, right_ndv); + for (int64_t i = 0; i < left_not_null_sels.count(); ++i) { + selectivity *= left_not_null_sels.at(i) * right_not_null_sels.at(i); + } + // 处理null safe, 这里假设多列上同时为null即小概率事件,只考虑特定列上为null的情况 + for (int64_t i = 0; i < null_safes.count(); ++i) { + if (null_safes.at(i)) { + double factor = 1.0; + for (int64_t j = 0; j < null_safes.count(); ++j) { + if (i == j) { + factor *= (1 - left_not_null_sels.at(j)) * (1 - right_not_null_sels.at(j)); + } else { + factor *= left_not_null_sels.at(j) * right_not_null_sels.at(j) / std::max(left_ndvs.at(j), right_ndvs.at(j)); + } + } + selectivity += factor; + } else {/* do nothing */} + } + } + LOG_TRACE("selectivity of `col_ref1 =|<=> col_ref1 and col_ref2 =|<=> col_ref2`", K(selectivity)); + return ret; +} + +// extract expr like '(-) col1 +(-) col2 + offset' +int ObInequalJoinSelEstimator::extract_column_offset(const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + bool is_minus, + bool &is_valid, + ObInequalJoinSelEstimator::Term &term, + double &offset) +{ + int ret = OB_SUCCESS; + is_valid = true; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param", KPC(expr)); + } else if (!ob_is_numeric_type(expr->get_data_type())) { + is_valid = false; + } else if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(expr))) { + LOG_WARN("failed to remove ignorable expr", KPC(expr)); + } else if (!ob_is_numeric_type(expr->get_data_type())) { + is_valid = false; + } else if (T_OP_ADD == expr->get_expr_type() || T_OP_MINUS == expr->get_expr_type()) { + bool child_is_minus = (T_OP_MINUS == expr->get_expr_type()) ? !is_minus : is_minus; + if (OB_UNLIKELY(expr->get_param_count() != 2)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param", KPC(expr)); + } else if (OB_FAIL(SMART_CALL(extract_column_offset(ctx, expr->get_param_expr(0), is_minus, is_valid, term, offset)))) { + LOG_WARN("failed to extract col offset", K(ret)); + } else if (!is_valid) { + // do nothing + } else if (OB_FAIL(SMART_CALL(extract_column_offset(ctx, expr->get_param_expr(1), child_is_minus, is_valid, term, offset)))) { + LOG_WARN("failed to extract col offset", K(ret)); + } + } else if (T_OP_NEG == expr->get_expr_type() || T_OP_POS == expr->get_expr_type()) { + bool child_is_minus = (T_OP_NEG == expr->get_expr_type()) ? !is_minus : is_minus; + if (OB_UNLIKELY(expr->get_param_count() != 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param", KPC(expr)); + } else if (OB_FAIL(SMART_CALL(extract_column_offset(ctx, expr->get_param_expr(0), child_is_minus, is_valid, term, offset)))) { + LOG_WARN("failed to extract col offset", K(ret)); + } + } else if (expr->is_column_ref_expr()) { + if (term.col1_ == NULL) { + term.col1_ = static_cast(expr); + term.coefficient1_ = is_minus ? -1.0 : 1.0; + } else if (term.col2_ == NULL) { + term.col2_ = static_cast(expr); + term.coefficient2_ = is_minus ? -1.0 : 1.0; + } else { + is_valid = false; + } + } else if (expr->is_static_const_expr()) { + ObObj const_value; + ObObj scalar_value; + bool got_result = false; + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + expr, + const_value, + got_result, + ctx.get_allocator()))) { + LOG_WARN("failed to calc const or calculable expr", K(ret)); + } else if (!got_result || !const_value.is_numeric_type() || const_value.is_null()) { + is_valid = false; + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_scalar_obj(&const_value, &scalar_value))) { + LOG_WARN("failed to convert obj to scalar", K(const_value)); + } else { + if (is_minus) { + offset -= scalar_value.get_double(); + } else { + offset += scalar_value.get_double(); + } + } + } else { + is_valid = false; + } + return ret; +} + +int ObInequalJoinSelEstimator::create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + ObInequalJoinSelEstimator *ineq_join_estimator = NULL; + bool is_valid = true; + if (IS_RANGE_CMP_OP(expr.get_expr_type())) { + Term term; + double offset = 0.0; + if (2 != expr.get_param_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr should have 2 param", K(ret), K(expr)); + } else if (OB_FAIL(extract_column_offset(ctx, expr.get_param_expr(0), false, is_valid, term, offset))) { + LOG_WARN("failed to extract column diff", KPC(expr.get_param_expr(0))); + } else if (!is_valid) { + // do nothing + } else if (OB_FAIL(extract_column_offset(ctx, expr.get_param_expr(1), true, is_valid, term, offset))) { + LOG_WARN("failed to extract column diff", KPC(expr.get_param_expr(1))); + } else if (!is_valid || !term.is_valid()) { + is_valid = false; + } else if (OB_FAIL(factory.create_estimator_inner(ineq_join_estimator))) { + LOG_WARN("failed to create estimator ", K(ret)); + } else { + ineq_join_estimator->term_ = term; + ineq_join_estimator->set_bound(expr.get_expr_type(), -offset); + } + } else if (T_OP_BTW == expr.get_expr_type()) { + Term term1; + Term term2; + double offset1 = 0.0; + double offset2 = 0.0; + bool is_same = false; + bool need_reverse = false; + if (3 != expr.get_param_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("between expr should have 3 param", K(ret), K(expr)); + } else if (OB_FAIL(extract_column_offset(ctx, expr.get_param_expr(0), false, is_valid, term1, offset1))) { + LOG_WARN("failed to extract column diff", KPC(expr.get_param_expr(0))); + } else if (!is_valid) { + // do nothing + } else if (FALSE_IT(offset2 = offset1) || FALSE_IT(term2 = term1)) { + } else if (OB_FAIL(extract_column_offset(ctx, expr.get_param_expr(1), true, is_valid, term1, offset1))) { + LOG_WARN("failed to extract column diff", KPC(expr.get_param_expr(1))); + } else if (OB_FAIL(extract_column_offset(ctx, expr.get_param_expr(2), true, is_valid, term2, offset2))) { + LOG_WARN("failed to extract column diff", KPC(expr.get_param_expr(2))); + } else if (!is_valid || !term1.is_valid() || !term2.is_valid()) { + is_valid = false; + } else if (FALSE_IT(cmp_term(term1, term2, is_same, need_reverse))) { + } else if (!is_same || need_reverse) { + is_valid = false; + } else if (OB_FAIL(factory.create_estimator_inner(ineq_join_estimator))) { + LOG_WARN("failed to create estimator ", K(ret)); + } else { + ineq_join_estimator->term_ = term1; + ineq_join_estimator->set_bound(T_OP_GE, -offset1); + ineq_join_estimator->set_bound(T_OP_LE, -offset2); + } + } + estimator = ineq_join_estimator; + return ret; +} + +void ObInequalJoinSelEstimator::cmp_term(const ObInequalJoinSelEstimator::Term &t1, + const ObInequalJoinSelEstimator::Term &t2, + bool &is_equal, bool &need_reverse) +{ + is_equal = false; + need_reverse = false; + if (t1.col1_ == t2.col1_ && t1.col2_ == t2.col2_) { + if (t1.coefficient1_ == t2.coefficient1_ && t1.coefficient2_ == t2.coefficient2_) { + is_equal = true; + } else if (t1.coefficient1_ == -t2.coefficient1_ && t1.coefficient2_ == -t2.coefficient2_) { + is_equal = true; + need_reverse = true; + } + } else if (t1.col1_ == t2.col2_ && t1.col2_ == t2.col1_) { + if (t1.coefficient1_ == t2.coefficient2_ && t1.coefficient2_ == t2.coefficient1_) { + is_equal = true; + } else if (t1.coefficient1_ == -t2.coefficient2_ && t1.coefficient2_ == -t2.coefficient1_) { + is_equal = true; + need_reverse = true; + } + } +} + +void ObInequalJoinSelEstimator::set_bound(ObItemType item_type, double bound) +{ + if (T_OP_LE == item_type) { + has_upper_bound_ = true; + upper_bound_ = bound; + include_upper_bound_ = true; + } else if (T_OP_LT == item_type) { + has_upper_bound_ = true; + upper_bound_ = bound; + include_upper_bound_ = false; + } else if (T_OP_GE == item_type) { + has_lower_bound_ = true; + lower_bound_ = bound; + include_lower_bound_ = true; + } else if (T_OP_GT == item_type) { + has_lower_bound_ = true; + lower_bound_ = bound; + include_lower_bound_ = false; + } +} + +void ObInequalJoinSelEstimator::reverse() +{ + term_.coefficient1_ = -term_.coefficient1_; + term_.coefficient2_ = -term_.coefficient2_; + std::swap(has_lower_bound_, has_upper_bound_); + std::swap(include_lower_bound_, include_upper_bound_); + std::swap(lower_bound_, upper_bound_); + lower_bound_ = -lower_bound_; + upper_bound_ = -upper_bound_; +} + +void ObInequalJoinSelEstimator::update_lower_bound(double bound, bool include) +{ + if (!has_lower_bound_ || + is_higher_lower_bound(bound, include, lower_bound_, include_lower_bound_)) { + include_lower_bound_ = include; + lower_bound_ = bound; + } + has_lower_bound_ = true; +} + +void ObInequalJoinSelEstimator::update_upper_bound(double bound, bool include) { + if (!has_upper_bound_ || + is_higher_upper_bound(upper_bound_, include_upper_bound_, bound, include)) { + include_upper_bound_ = include; + upper_bound_ = bound; + } + has_upper_bound_= true; +} + +int ObInequalJoinSelEstimator::merge(const ObSelEstimator &other_estmator, bool &is_success) +{ + int ret = OB_SUCCESS; + is_success = false; + if (get_type() == other_estmator.get_type()) { + const ObInequalJoinSelEstimator &other = static_cast(other_estmator); + bool need_reverse = false; + cmp_term(term_, other.term_, is_success, need_reverse); + if (is_success){ + if (need_reverse) { + reverse(); + } + if (other.has_lower_bound_) { + update_lower_bound(other.lower_bound_, other.include_lower_bound_); + } + if (other.has_upper_bound_) { + update_upper_bound(other.upper_bound_, other.include_upper_bound_); + } + } + } + return ret; +} + +double ObInequalJoinSelEstimator::get_gt_sel(double min1, + double max1, + double min2, + double max2, + double offset) +{ + double selectivity = 0.0; + double total = (max2 - min2) * (max1 - min1); + + if (offset < min1 + min2) { + selectivity = 1.0; + } else if (offset < max1 + min2 && offset < min1 + max2 && total > OB_DOUBLE_EPSINON) { + selectivity = 1 - (offset - min1 - min2) * (offset - min1 - min2) / (2 * total); + } else if (offset >= max1 + min2 && offset < min1 + max2 && max1 - min1 > OB_DOUBLE_EPSINON) { + selectivity = (2 * max2 + min1 + max1 - 2 * offset) / (2 * (max2 - min2)); + } else if (offset >= min1 + max2 && offset < max1 + min2 && max2 - min2 > OB_DOUBLE_EPSINON) { + selectivity = (min2 + max2 + 2 * max1 - 2 * offset) / (2 * (max1 - min1)); + } else if (offset < max1 + max2 && total > OB_DOUBLE_EPSINON) { + selectivity = (max1 + max2 - offset) * (max1 + max2 - offset) / (2 * total); + } else { + selectivity = 0.0; + } + return selectivity; +} + +double ObInequalJoinSelEstimator::get_any_gt_sel(double min1, + double max1, + double min2, + double max2, + double offset) +{ + double selectivity = 0.0; + if (offset < min1 + max2) { + selectivity = 1.0; + } else if (offset < max1 + max2 && max1 - min1 > OB_DOUBLE_EPSINON) { + selectivity = (max1 + max2 - offset) / (max1 - min1); + } else { + selectivity = 0.0; + } + return selectivity; +} + +double ObInequalJoinSelEstimator::get_all_gt_sel(double min1, + double max1, + double min2, + double max2, + double offset) +{ + double selectivity = 0.0; + if (offset < min1 + min2) { + selectivity = 1.0; + } else if (offset < max1 + min2 && max1 - min1 > OB_DOUBLE_EPSINON) { + selectivity = (max1 + min2 - offset) / (max1 - min1); + } else { + selectivity = 0.0; + } + return selectivity; +} + +double ObInequalJoinSelEstimator::get_equal_sel(double min1, + double max1, + double ndv1, + double min2, + double max2, + double ndv2, + double offset, + bool is_semi) +{ + double selectivity = 0.0; + double overlap = 0.0; + double overlap_ndv1 = 1.0, overlap_ndv2 = 1.0; + if (offset < min1 + min2) { + overlap = 0.0; + } else if (offset < max1 + min2 && offset < min1 + max2) { + overlap = offset - min1 - min2; + } else if (offset >= max1 + min2 && offset < min1 + max2) { + overlap = max1 - min1; + } else if (offset >= min1 + max2 && offset < max1 + min2) { + overlap = max2 - min2; + } else if (offset < max1 + max2) { + overlap = max1 + max2 - offset; + } else { + overlap = 0.0; + } + if (max1 - min1 > OB_DOUBLE_EPSINON) { + overlap_ndv1 = revise_ndv(ndv1 * overlap / (max1 - min1)) ; + } else { + overlap_ndv1 = 1; + } + if (max2 - min2 > OB_DOUBLE_EPSINON) { + overlap_ndv2 = revise_ndv(ndv2 * overlap / (max2 - min2)) ; + } else { + overlap_ndv2 = 1; + } + if (is_semi) { + selectivity = overlap_ndv1 / ndv1; + } else { + selectivity = 1 / max(overlap_ndv1, overlap_ndv2) * (overlap_ndv1 / ndv1) * (overlap_ndv2 / ndv2); + } + return selectivity; +} + +int ObInequalJoinSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + ObObj obj_min, obj_max, tmp_obj; + selectivity = 1.0; + double nns1, nns2, ndv1, ndv2; + double min1, min2, max1, max2; + double lower_bound = lower_bound_; + double upper_bound = upper_bound_; + bool is_eq = include_lower_bound_ && include_upper_bound_ && + upper_bound - lower_bound <= OB_DOUBLE_EPSINON && + lower_bound - upper_bound <= OB_DOUBLE_EPSINON; + if (OB_ISNULL(term_.col1_) || + OB_ISNULL(term_.col2_) || + OB_UNLIKELY(!has_lower_bound_ && !has_upper_bound_) || + OB_UNLIKELY(fabs(term_.coefficient1_) != 1.0) || + OB_UNLIKELY(fabs(term_.coefficient2_) != 1.0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param", KPC(this)); + } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *term_.col1_, &ndv1, &nns1))) { + LOG_WARN("failed to get nns"); + } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *term_.col2_, &ndv2, &nns2))) { + LOG_WARN("failed to get nns"); + } else if (has_lower_bound_ && has_upper_bound_ && + lower_bound >= upper_bound && !is_eq) { + // always false + // e.g. 1 < c1 + c2 < 0 + selectivity = 0.0; + } else if (term_.col1_->get_table_id() == term_.col2_->get_table_id() && + term_.col1_->get_column_id() == term_.col2_->get_column_id()) { + // same column + if (fabs(term_.coefficient1_ + term_.coefficient2_) <= OB_DOUBLE_EPSINON) { + if (has_lower_bound_ && + is_higher_lower_bound(lower_bound, include_lower_bound_, 0, true)) { + // e.g. : c1 - c1 > 1 + selectivity = 0.0; + } else if (has_upper_bound_ && + is_higher_upper_bound(0, true, upper_bound, include_upper_bound_)) { + // e.g. : c1 - c1 < - 1 + selectivity = 0.0; + } else { + // e.g. : c1 - c1 < 1 + selectivity = nns1; + } + } else { + // TODO : c1 + c1 < 1 + selectivity = DEFAULT_INEQ_JOIN_SEL; + } + } else if (OB_FAIL(ObOptSelectivity::get_column_min_max(table_metas, ctx, *term_.col1_, obj_min, obj_max))) { + LOG_WARN("failed to get column min max", K(ret), KPC(term_.col1_)); + } else if (obj_min.is_min_value() || obj_min.is_max_value() || + obj_max.is_max_value() || obj_max.is_min_value()) { + selectivity = DEFAULT_INEQ_JOIN_SEL; + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_scalar_obj(&obj_min, &tmp_obj))) { + LOG_WARN("failed to convert obj", K(obj_min)); + } else if (FALSE_IT(min1 = tmp_obj.get_double() * term_.coefficient1_)) { + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_scalar_obj(&obj_max, &tmp_obj))) { + LOG_WARN("failed to convert obj", K(obj_max)); + } else if (FALSE_IT(max1 = tmp_obj.get_double() * term_.coefficient1_)) { + } else if (OB_FAIL(ObOptSelectivity::get_column_min_max(table_metas, ctx, *term_.col2_, obj_min, obj_max))) { + LOG_WARN("failed to get column min max", K(ret), KPC(term_.col2_)); + } else if (obj_min.is_min_value() || obj_min.is_max_value() || + obj_max.is_max_value() || obj_max.is_min_value()) { + selectivity = DEFAULT_INEQ_JOIN_SEL; + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_scalar_obj(&obj_min, &tmp_obj))) { + LOG_WARN("failed to convert obj", K(obj_min)); + } else if (FALSE_IT(min2 = tmp_obj.get_double() * term_.coefficient2_)) { + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_scalar_obj(&obj_max, &tmp_obj))) { + LOG_WARN("failed to convert obj", K(obj_max)); + } else if (FALSE_IT(max2 = tmp_obj.get_double() * term_.coefficient2_)) { + } else { + if (term_.coefficient1_ < 0) { + std::swap(min1, max1); + } + if (term_.coefficient2_ < 0) { + std::swap(min2, max2); + } + bool is_semi = IS_SEMI_ANTI_JOIN(ctx.get_join_type()); + if (is_semi) { + if (OB_ISNULL(ctx.get_left_rel_ids()) || OB_ISNULL(ctx.get_right_rel_ids())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ctx.get_left_rel_ids()), K(ctx.get_right_rel_ids())); + } else if (term_.col1_->get_relation_ids().overlap(*ctx.get_right_rel_ids()) || + term_.col2_->get_relation_ids().overlap(*ctx.get_left_rel_ids())) { + std::swap(min1, min2); + std::swap(max1, max2); + std::swap(ndv1, ndv2); + std::swap(nns1, nns2); + } + } + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(min1 > max1) || + OB_UNLIKELY(min2 > max2)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected min max", K(min1), K(max1), K(min2), K(max2), KPC(this)); + } else if (fabs(max1 - min1) <= OB_DOUBLE_EPSINON && fabs(max2 - min2) <= OB_DOUBLE_EPSINON) { + // Both c1 and c2 have only one value + // e.g. c1 in [1,1] and c2 in [2,2] + selectivity = get_sel_for_point(min1, min2); + } else if (is_eq) { + // lower bound is the same as the upper bound + // e.g : 1 <= c1 + c2 <= 1; + selectivity = ObInequalJoinSelEstimator::get_equal_sel(min1, max1, ndv1, min2, max2, ndv2, lower_bound, is_semi); + } else if (is_semi) { + // calculate selectivity for semi join + // e.g. : 0 <= c1 + c2 < 1 + double sel1 = has_lower_bound_ ? ObInequalJoinSelEstimator::get_any_gt_sel(min1, max1, min2, max2, lower_bound) : 1.0; + double sel2 = has_upper_bound_ ? ObInequalJoinSelEstimator::get_all_gt_sel(min1, max1, min2, max2, upper_bound) : 0.0; + // the sel of `any c2 satisfy 'a < c1 + c2 < b'` = + // the sel of `any c2 satisfy 'c1 + c2 > a'` minus the sel of `all c2 satisfy 'c1 + c2 > b'` + selectivity = sel1 - sel2; + if (include_lower_bound_ && ndv1 > 1) { + selectivity += 1 / ndv1; + } + if (include_upper_bound_ && ndv1 > 1) { + selectivity += 1 / ndv1; + } + } else { + // calculate selectivity for inner join + // e.g. : 0 <= c1 + c2 < 1 + double sel1 = has_lower_bound_ ? ObInequalJoinSelEstimator::get_gt_sel(min1, max1, min2, max2, lower_bound) : 1.0; + double sel2 = has_upper_bound_ ? ObInequalJoinSelEstimator::get_gt_sel(min1, max1, min2, max2, upper_bound) : 0.0; + // the sel of 'a < c1 + c2 < b' = + // the sel of 'c1 + c2 > a' minus the sel of 'c1 + c2 > b' + selectivity = sel1 - sel2; + if (include_lower_bound_) { + selectivity += ObInequalJoinSelEstimator::get_equal_sel(min1, max1, ndv1, min2, max2, ndv2, lower_bound, is_semi); + } + if (include_upper_bound_) { + selectivity += ObInequalJoinSelEstimator::get_equal_sel(min1, max1, ndv1, min2, max2, ndv2, upper_bound, is_semi); + } + } + selectivity = ObOptSelectivity::revise_between_0_1(selectivity); + + // process not null sel + if (is_semi) { + selectivity *= nns1; + } else { + selectivity *= nns1 * nns2; + } + } + return ret; +} + +double ObInequalJoinSelEstimator::get_sel_for_point(double point1, double point2) +{ + bool within_interval = true; + double sum = point1 + point2; + if (has_lower_bound_) { + within_interval &= include_lower_bound_ ? sum >= lower_bound_ : sum > lower_bound_; + } + if (has_upper_bound_) { + within_interval &= include_upper_bound_ ? sum <= upper_bound_ : sum < upper_bound_; + } + return within_interval ? 1.0 : 0.0; +} + +int ObSelEstimatorFactory::create_estimator(const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + ObSelEstimator *&new_estimator) +{ + int ret = OB_SUCCESS; + new_estimator = NULL; + /* + * The ordering to create the estimator is important + */ + static const CreateEstimatorFunc create_estimator_funcs[] = + { + ObSimpleJoinSelEstimator::create_estimator, + ObRangeSelEstimator::create_estimator, + ObInequalJoinSelEstimator::create_estimator, + ObAggSelEstimator::create_estimator, + ObConstSelEstimator::create_estimator, + ObColumnSelEstimator::create_estimator, + ObEqualSelEstimator::create_estimator, + ObLikeSelEstimator::create_estimator, + ObBoolOpSelEstimator::create_estimator, + ObInSelEstimator::create_estimator, + ObIsSelEstimator::create_estimator, + ObCmpSelEstimator::create_estimator, + ObBtwSelEstimator::create_estimator, + ObDefaultSelEstimator::create_estimator, + }; + static const int64_t func_cnt = sizeof(create_estimator_funcs)/sizeof(CreateEstimatorFunc); + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", KPC(expr)); + } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(expr, expr))) { + LOG_WARN("failed to get lossless cast expr", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && NULL == new_estimator && i < func_cnt; i ++) { + if (OB_FAIL(create_estimator_funcs[i](*this, ctx, *expr, new_estimator))) { + LOG_WARN("failed to create estimator", K(ret)); + } + } + if (OB_SUCC(ret) && OB_ISNULL(new_estimator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to create estimator", KPC(new_estimator), KPC(expr)); + } + LOG_DEBUG("succeed to create estimator", KPC(new_estimator)); + return ret; +} + +}//end of namespace sql +}//end of namespace oceanbase diff --git a/src/sql/optimizer/ob_sel_estimator.h b/src/sql/optimizer/ob_sel_estimator.h new file mode 100644 index 0000000000..b90ba5839a --- /dev/null +++ b/src/sql/optimizer/ob_sel_estimator.h @@ -0,0 +1,927 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SQL_OPTIMIZER_OB_SEL_ESTIMATOR_ +#define OCEANBASE_SQL_OPTIMIZER_OB_SEL_ESTIMATOR_ + +#include "sql/optimizer/ob_opt_selectivity.h" + +namespace oceanbase +{ +namespace sql +{ + +enum class ObSelEstType +{ + INVALID = 0, + DEFAULT, + CONST, + IN, + COLUMN, + BTW, + IS, + CMP, + AGG, + EQUAL, + LIKE, + BOOL_OP, + RANGE, + SIMPLE_JOIN, + INEQUAL_JOIN, +}; + +class ObSelEstimatorFactory; +class ObSelEstimator +{ +public: + ObSelEstimator(ObSelEstType type) : type_(type) {} + virtual ~ObSelEstimator() = default; + + static int append_estimators(ObIArray &sel_estimators, ObSelEstimator *new_estimator); + + // Check whether it is related to other ObSelEstimator, and if so, merge them + virtual int merge(const ObSelEstimator &other, bool &is_success) = 0; + // check whether it is independent of any other ObSelEstimator + virtual bool is_independent() const = 0; + // Calculate the selectivity + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) = 0; + // Check whether we tend to use dynamic sampling for this estimator + virtual bool tend_to_use_ds() = 0; + inline ObSelEstType get_type() const { return type_; } + + VIRTUAL_TO_STRING_KV(K_(type)); + +protected: + ObSelEstType type_; + +private: + DISABLE_COPY_ASSIGN(ObSelEstimator); +}; + +class ObSelEstimatorFactory +{ +public: + explicit ObSelEstimatorFactory(common::ObIAllocator &alloc) + : allocator_(alloc), + estimator_store_(alloc) + {} + + ~ObSelEstimatorFactory() { + destory(); + } + + inline common::ObIAllocator &get_allocator() { return allocator_; } + inline void destory() + { + DLIST_FOREACH_NORET(node, estimator_store_.get_obj_list()) { + if (node != NULL && node->get_obj() != NULL) { + node->get_obj()->~ObSelEstimator(); + } + } + estimator_store_.destroy(); + } + + int create_estimator(const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + ObSelEstimator *&new_estimator); + + template + inline int create_estimator_inner(EstimatorType *&new_estimator) + { + int ret = common::OB_SUCCESS; + void *ptr = allocator_.alloc(sizeof(EstimatorType)); + new_estimator = NULL; + if (OB_ISNULL(ptr)) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + SQL_OPT_LOG(ERROR, "no more memory to create estimator"); + } else { + new_estimator = new (ptr) EstimatorType(); + if (OB_FAIL(estimator_store_.store_obj(new_estimator))) { + SQL_OPT_LOG(WARN, "store estimator failed", K(ret)); + new_estimator->~EstimatorType(); + new_estimator = NULL; + } + } + return ret; + } + + typedef int (*CreateEstimatorFunc) (ObSelEstimatorFactory &, const OptSelectivityCtx &, + const ObRawExpr &, ObSelEstimator *&); + +private: + common::ObIAllocator &allocator_; + common::ObObjStore estimator_store_; +private: + DISALLOW_COPY_AND_ASSIGN(ObSelEstimatorFactory); +}; + +template +int create_simple_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + estimator = NULL; + ObTemplateEstimator *temp_estimator = NULL; + if (!ObTemplateEstimator::check_expr_valid(expr)) { + // do nothing + } else if (OB_FAIL(factory.create_estimator_inner(temp_estimator))) { + LOG_WARN("failed to create estimator ", K(ret)); + } else { + temp_estimator->set_expr(&expr); + estimator = temp_estimator; + } + return ret; +} + +/** + * Virtual class which estimate selectivity for filters that are independent of others +*/ +class ObIndependentSelEstimator : public ObSelEstimator +{ +public: + ObIndependentSelEstimator(ObSelEstType type) : ObSelEstimator(type), expr_(NULL) {} + virtual ~ObIndependentSelEstimator() = default; + + virtual int merge(const ObSelEstimator &other, bool &is_success) override { + int ret = OB_SUCCESS; + is_success = false; + return ret; + } + + virtual bool is_independent() const override { return true; } + inline void set_expr(const ObRawExpr *expr) { expr_ = expr; } + + VIRTUAL_TO_STRING_KV(K_(type), KPC_(expr)); + +protected: + const ObRawExpr *expr_; + +private: + DISABLE_COPY_ASSIGN(ObIndependentSelEstimator); +}; + +/** + * Estimate default selectivity +*/ +class ObDefaultSelEstimator : public ObIndependentSelEstimator +{ +public: + ObDefaultSelEstimator() : ObIndependentSelEstimator(ObSelEstType::DEFAULT) {} + virtual ~ObDefaultSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) + { + return create_simple_estimator(factory, ctx, expr, estimator); + } + virtual bool tend_to_use_ds() override { return true; } + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override; + inline static bool check_expr_valid(const ObRawExpr &expr) { return true; } +private: + DISALLOW_COPY_AND_ASSIGN(ObDefaultSelEstimator); +}; + +/** + * Estimate selectivity for preds which contain agg function + * such as : `max(c1) < 10` +*/ +class ObAggSelEstimator : public ObIndependentSelEstimator +{ +public: + ObAggSelEstimator() : ObIndependentSelEstimator(ObSelEstType::AGG) {} + virtual ~ObAggSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) + { + return create_simple_estimator(factory, ctx, expr, estimator); + } + virtual bool tend_to_use_ds() override { return false; } + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override; + inline static bool check_expr_valid(const ObRawExpr &expr) { return expr.has_flag(CNT_AGG); } +private: + static int get_agg_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity); + + static int get_agg_sel_with_minmax(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &aggr_expr, + const ObRawExpr *const_expr1, + const ObRawExpr *const_expr2, + const ObItemType type, + double &selectivity, + const double rows_per_group); + + static double get_agg_eq_sel(const ObObj &maxobj, + const ObObj &minobj, + const ObObj &constobj, + const double distinct_sel, + const double rows_per_group, + const bool is_eq, + const bool is_sum); + + static double get_agg_range_sel(const ObObj &maxobj, + const ObObj &minobj, + const ObObj &constobj, + const double rows_per_group, + const ObItemType type, + const bool is_sum); + + static double get_agg_btw_sel(const ObObj &maxobj, + const ObObj &minobj, + const ObObj &constobj1, + const ObObj &constobj2, + const double rows_per_group, + const ObItemType type, + const bool is_sum); + + static int is_valid_agg_qual(const ObRawExpr &qual, + bool &is_valid, + const ObRawExpr *&aggr_expr, + const ObRawExpr *&const_expr1, + const ObRawExpr *&const_expr2); +private: + DISABLE_COPY_ASSIGN(ObAggSelEstimator); +}; + +/** + * calculate const or calculable expr selectivity. + * e.g. `1`, `1 = 1`, `1 + 1`, `1 = 0` + * if expr is always true, selectivity = 1.0 + * if expr is always false, selectivity = 0.0 + * if expr can't get actual value, like exec_param, selectivity = 0.5 + */ +class ObConstSelEstimator : public ObIndependentSelEstimator +{ +public: + ObConstSelEstimator() : ObIndependentSelEstimator(ObSelEstType::CONST) {} + virtual ~ObConstSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) + { + return create_simple_estimator(factory, ctx, expr, estimator); + } + virtual bool tend_to_use_ds() override { return false; } + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override + { + int ret = OB_SUCCESS; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", KPC(this)); + } else { + ret = get_const_sel(ctx, *expr_, selectivity); + } + return ret; + } + inline static bool check_expr_valid(const ObRawExpr &expr) { return expr.is_const_expr(); } +private: + static int get_const_sel(const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity); +private: + DISABLE_COPY_ASSIGN(ObConstSelEstimator); +}; + +/** + * calculate column expr selectivity. + * e.g. `c1`, `t1.c1` + * selectity = 1.0 - sel(t1.c1 = 0) - sel(t1.c1 is NULL) + */ +class ObColumnSelEstimator : public ObIndependentSelEstimator +{ +public: + ObColumnSelEstimator() : ObIndependentSelEstimator(ObSelEstType::COLUMN) {} + virtual ~ObColumnSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) + { + return create_simple_estimator(factory, ctx, expr, estimator); + } + virtual bool tend_to_use_ds() override { return false; } + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override + { + int ret = OB_SUCCESS; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", KPC(this)); + } else { + ret = get_column_sel(table_metas, ctx, *expr_, selectivity); + } + return ret; + } + inline static bool check_expr_valid(const ObRawExpr &expr) { return expr.is_column_ref_expr(); } +private: + static int get_column_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity); +private: + DISABLE_COPY_ASSIGN(ObColumnSelEstimator); +}; + +/** + * calculate [not] in predicate selectivity + * e.g. `c1 in (1, 2, 3)`, `1 in (c1, c2, c3)` + * The most commonly format `column in (const1, const2, const3)` + * selectivity = sum(selectivity(column = const_i)) + * otherwise, `var in (var1, var2, var3) + * selectivity = sum(selectivity(var = var_i)) + * not_in_selectivity = 1.0 - in_selectivity + */ +class ObInSelEstimator : public ObIndependentSelEstimator +{ +public: + ObInSelEstimator() : ObIndependentSelEstimator(ObSelEstType::IN) {} + virtual ~ObInSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) + { + return create_simple_estimator(factory, ctx, expr, estimator); + } + virtual bool tend_to_use_ds() override { return false; } + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override + { + int ret = OB_SUCCESS; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", KPC(this)); + } else { + ret = get_in_sel(table_metas, ctx, *expr_, selectivity); + } + return ret; + } + inline static bool check_expr_valid(const ObRawExpr &expr) { + return T_OP_IN == expr.get_expr_type() || T_OP_NOT_IN == expr.get_expr_type(); + } +private: + static int get_in_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity); +private: + DISABLE_COPY_ASSIGN(ObInSelEstimator); +}; + +// get var is[not] NULL\true\false selectivity +// for var is column: +// var is NULL: selectivity = null_sel(get_var_basic_sel) +// var is true: selectivity = 1 - distinct_sel(var = 0) - null_sel +// var is false: selectivity = distinct_sel(var = 0) +// others: +// DEFAULT_SEL +// for var is not NULL\true\false: selectivity = 1.0 - is_sel +/** + * calculate is [not] predicate selectivity + * e.g. `c1 is null`, `c1 is ture`(mysql only) + */ +class ObIsSelEstimator : public ObIndependentSelEstimator +{ +public: + ObIsSelEstimator() : ObIndependentSelEstimator(ObSelEstType::IS) {} + virtual ~ObIsSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) + { + return create_simple_estimator(factory, ctx, expr, estimator); + } + virtual bool tend_to_use_ds() override { return false; } + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override + { + int ret = OB_SUCCESS; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", KPC(this)); + } else { + ret = get_is_sel(table_metas, ctx, *expr_, selectivity); + } + return ret; + } + inline static bool check_expr_valid(const ObRawExpr &expr) { + return T_OP_IS == expr.get_expr_type() || T_OP_IS_NOT == expr.get_expr_type(); + } +private: + static int get_is_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity); +private: + DISABLE_COPY_ASSIGN(ObIsSelEstimator); +}; + +//c1 between $val1 and $val2 -> equal with [$val2 - $val1] range sel +//c1 not between $val1 and $val2 -> equal with (min, $val1) or ($val2, max) range sel +class ObBtwSelEstimator : public ObIndependentSelEstimator +{ +public: + ObBtwSelEstimator() : ObIndependentSelEstimator(ObSelEstType::BTW) {} + virtual ~ObBtwSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) + { + return create_simple_estimator(factory, ctx, expr, estimator); + } + virtual bool tend_to_use_ds() override { return false; } + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override + { + int ret = OB_SUCCESS; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", KPC(this)); + } else { + ret = get_btw_sel(table_metas, ctx, *expr_, selectivity); + } + return ret; + } + inline static bool check_expr_valid(const ObRawExpr &expr) { + return T_OP_BTW == expr.get_expr_type() || T_OP_NOT_BTW == expr.get_expr_type(); + } +private: + static int get_btw_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity); +private: + DISABLE_COPY_ASSIGN(ObBtwSelEstimator); +}; + +// col RANGE_CMP const, column_range_sel +// (c1, c2) RANGE_CMP (c3, c4) +// func(col) RANGE_CMP const, DEFAULT_INEQ_SEL +class ObCmpSelEstimator : public ObIndependentSelEstimator +{ +public: + ObCmpSelEstimator() : ObIndependentSelEstimator(ObSelEstType::CMP) {} + virtual ~ObCmpSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) + { + return create_simple_estimator(factory, ctx, expr, estimator); + } + virtual bool tend_to_use_ds() override { return false; } + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override + { + int ret = OB_SUCCESS; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", KPC(this)); + } else { + ret = get_range_cmp_sel(table_metas, ctx, *expr_, selectivity); + } + return ret; + } + inline static bool check_expr_valid(const ObRawExpr &expr) { + return IS_RANGE_CMP_OP(expr.get_expr_type()); + } +private: + static int get_range_cmp_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity); +private: + DISABLE_COPY_ASSIGN(ObCmpSelEstimator); +}; + +// Estimate selectivity for equal preds +// such as: `c1 = 1`, `c1 <=> c2`, `c1 != 3` +class ObEqualSelEstimator : public ObIndependentSelEstimator +{ +public: + ObEqualSelEstimator() : + ObIndependentSelEstimator(ObSelEstType::EQUAL) {} + virtual ~ObEqualSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) + { + return create_simple_estimator(factory, ctx, expr, estimator); + } + virtual bool tend_to_use_ds() override { return false; } + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override; + + inline static bool check_expr_valid(const ObRawExpr &expr) { + return T_OP_EQ == expr.get_expr_type() || + T_OP_NSEQ == expr.get_expr_type() || + T_OP_NE == expr.get_expr_type(); + } + + //1. var = | <=> const, get_simple_predicate_sel + //2. func(var) = | <=> const, + // only simple op(+,-,*,/), get_simple_predicate_sel, + // mod(cnt_var, mod_num), distinct_sel * mod_num + // else sqrt(distinct_sel) + //3. cnt(var) = |<=> cnt(var) get_cntcol_eq_cntcol_sel + static int get_equal_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + double &selectivity); + static int get_equal_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &left_expr, + const ObRawExpr &right_expr, + const bool null_safe, + double &selectivity); +private: + // col or (col +-* 2) != 1, 1.0 - distinct_sel - null_sel + // col or (col +-* 2) != NULL -> 0.0 + // otherwise DEFAULT_SEL; + static int get_ne_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &l_expr, + const ObRawExpr &r_expr, + double &selectivity); + + // Get simple predicate selectivity + // (col) | (col +-* num) = const, sel = distinct_sel + // (col) | (col +-* num) = null, sel = 0 + // (col) | (col +-* num) <=> const, sel = distinct_sel + // (col) | (col +-* num) <=> null, sel = null_sel + // multi_col | func(col) =|<=> null, sel DEFAULT_EQ_SEL 0.005 + // @param partition_id only used in base table + /** + * calculate equal predicate with format `contain_column_expr = not_contain_column_expr` by ndv + * e.g. `c1 = 1`, `c1 + 1 = 2`, `c1 + c2 = 10` + * if contain_column_expr contain not monotonic operator or has more than one column, + * selectivity = DEFAULT_EQ_SEL + * if contain_column_expr contain only one column and contain only monotonic operator, + * selectivity = 1 / ndv + */ + static int get_simple_equal_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &cnt_col_expr, + const ObRawExpr *calculable_expr, + const bool null_safe, + double &selectivity); + + static int get_cntcol_op_cntcol_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr &input_left_expr, + const ObRawExpr &input_right_expr, + ObItemType op_type, + double &selectivity); +private: + DISABLE_COPY_ASSIGN(ObEqualSelEstimator); +}; + +/** + * Estimate selectivity for like preds + * such as: `c1 like 'xx%'`, `c1 like '%xx'` + * c1 like 'xx%', use query range selectivity + * c1 like '%xx', use DEFAULT_INEQ_SEL 1.0 / 3.0 +*/ +class ObLikeSelEstimator : public ObIndependentSelEstimator +{ +public: + ObLikeSelEstimator() : + ObIndependentSelEstimator(ObSelEstType::LIKE), + variable_(NULL), + pattern_(NULL), + escape_(NULL), + can_calc_sel_(false), + match_all_str_(false) {} + virtual ~ObLikeSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator); + virtual bool tend_to_use_ds() override { return !can_calc_sel_; } + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override; + static int can_calc_like_sel(const OptSelectivityCtx &ctx, const ObRawExpr &expr, bool &can_calc_sel); + +private: + const ObRawExpr *variable_; + const ObRawExpr *pattern_; + const ObRawExpr *escape_; + bool can_calc_sel_; + bool match_all_str_; +private: + DISABLE_COPY_ASSIGN(ObLikeSelEstimator); +}; + +/** + * Estimate selectivity for bool op preds + * such as: `c1 > 1 or c2 > 1`, `lnnvl(c1 > 1)` +*/ +class ObBoolOpSelEstimator : public ObIndependentSelEstimator +{ +public: + ObBoolOpSelEstimator() : ObIndependentSelEstimator(ObSelEstType::BOOL_OP) {} + virtual ~ObBoolOpSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator); + virtual bool tend_to_use_ds() override; + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override; + +private: + common::ObSEArray child_estimators_; +private: + DISABLE_COPY_ASSIGN(ObBoolOpSelEstimator); +}; + +/** + * Estimate selectivity for range preds which contain the same column + * such as: `c1 > 1 and (c1 < 5 or c1 > 7)` +*/ +class ObRangeSelEstimator : public ObSelEstimator +{ +public: + ObRangeSelEstimator() : ObSelEstimator(ObSelEstType::RANGE), column_expr_(NULL) {} + virtual ~ObRangeSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator); + virtual int merge(const ObSelEstimator &other, bool &is_success) override; + virtual bool is_independent() const override { return false; } + + // 计算选择率 + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override; + + virtual bool tend_to_use_ds() override { return false; } + + VIRTUAL_TO_STRING_KV(K_(type), KPC_(column_expr), K_(range_exprs)); + + inline int get_min_max(const OptSelectivityCtx &ctx, + ObObj &obj_min, + ObObj &obj_max) { + return ObOptSelectivity::get_column_range_min_max(ctx, column_expr_, range_exprs_, obj_min, obj_max); + } + + const ObColumnRefRawExpr *get_column_expr() const { return column_expr_; } + ObIArray &get_range_exprs() { return range_exprs_; } + +private: + const ObColumnRefRawExpr *column_expr_; + common::ObSEArray range_exprs_; +private: + DISALLOW_COPY_AND_ASSIGN(ObRangeSelEstimator); +}; + +/** + * Estimate selectivity for equal join filter which join ctx.get_left_rel_ids() and ctx.get_right_rel_ids() + * such as: `t1.c1 = t2.c1 and t1.c2 = t2.c2` +*/ +class ObSimpleJoinSelEstimator : public ObSelEstimator +{ +public: + ObSimpleJoinSelEstimator() : ObSelEstimator(ObSelEstType::SIMPLE_JOIN) {} + virtual ~ObSimpleJoinSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator); + virtual int merge(const ObSelEstimator &other, bool &is_success) override; + virtual bool is_independent() const override { return false; } + + // 计算选择率 + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override; + + virtual bool tend_to_use_ds() override { return false; } + + VIRTUAL_TO_STRING_KV(K_(type), KPC_(left_rel_ids), KPC_(right_rel_ids), K_(join_conditions)); +private: + static int is_simple_join_condition(const ObRawExpr &qual, + const ObRelIds *left_rel_ids, + const ObRelIds *right_rel_ids, + bool &is_valid); + static int get_multi_equal_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + ObIArray &quals, + double &selectivity); + static int extract_join_exprs(ObIArray &quals, + const ObRelIds &left_rel_ids, + const ObRelIds &right_rel_ids, + ObIArray &left_exprs, + ObIArray &right_exprs, + ObIArray &null_safes); + static int get_cntcols_eq_cntcols_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObIArray &left_exprs, + const ObIArray &right_exprs, + const ObIArray &null_safes, + double &selectivity); + /** + * 判断多列连接是否只涉及到两个表 + */ + static int is_valid_multi_join(ObIArray &quals, + bool &is_valid); + + const ObRelIds *left_rel_ids_; + const ObRelIds *right_rel_ids_; + common::ObSEArray join_conditions_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObSimpleJoinSelEstimator); +}; + +/** + * Estimate selectivity for inequal join filter which contains the same term + * such as: `t1.c1 - t2.c1 < 2 and t1.c1 > t2.c1 - 3` +*/ +class ObInequalJoinSelEstimator : public ObSelEstimator +{ +public: + struct Term { + Term() : col1_(NULL), col2_(NULL), coefficient1_(1.0), coefficient2_(1.0) {} + bool is_valid() { return col1_ != NULL && col2_ != NULL; } + + VIRTUAL_TO_STRING_KV(K_(col1), KPC_(col2), + K_(coefficient1), K_(coefficient2)); + + const ObColumnRefRawExpr *col1_; + const ObColumnRefRawExpr *col2_; + double coefficient1_; + double coefficient2_; + }; + +public: + ObInequalJoinSelEstimator() : + ObSelEstimator(ObSelEstType::INEQUAL_JOIN), + has_lower_bound_(false), + has_upper_bound_(false), + include_lower_bound_(false), + include_upper_bound_(false), + lower_bound_(0), + upper_bound_(0) {} + virtual ~ObInequalJoinSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator); + virtual int merge(const ObSelEstimator &other, bool &is_success) override; + virtual bool is_independent() const override { return false; } + + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override; + + virtual bool tend_to_use_ds() override { return false; } + + VIRTUAL_TO_STRING_KV(K_(type), K_(term), + K_(has_lower_bound), K_(has_upper_bound), + K_(include_lower_bound), K_(include_upper_bound), + K_(lower_bound), K_(upper_bound)); + +private: + + static void cmp_term(const Term &t1, const Term &t2, bool &equal, bool &need_reverse); + + static int extract_ineq_qual(const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + bool &is_valid); + + static int extract_column_offset(const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + bool is_minus, + bool &is_valid, + Term &term, + double &offset); + + static bool is_higher_lower_bound(double bound1, bool include1, double bound2, bool include2) + { + return bound1 > bound2 || (bound1 == bound2 && !include1 && include2); + } + static bool is_higher_upper_bound(double bound1, bool include1, double bound2, bool include2) + { + return bound1 > bound2 || (bound1 == bound2 && include1 && !include2); + } + // c1 in [min1, max1], c2 in [min2, max2] + // calc the sel of `c1 + c2 > offset`; + static double get_gt_sel(double min1, + double max1, + double min2, + double max2, + double offset); + + static double get_any_gt_sel(double min1, + double max1, + double min2, + double max2, + double offset); + + static double get_all_gt_sel(double min1, + double max1, + double min2, + double max2, + double offset); + + // c1 in [min1, max1], c2 in [min2, max2] + // calc the sel of `c1 + c2 = offset`; + static double get_equal_sel(double min1, + double max1, + double ndv1, + double min2, + double max2, + double ndv2, + double offset, + bool is_semi); + + double get_sel_for_point(double point1, double point2); + + void reverse(); + void update_lower_bound(double bound, bool include); + void update_upper_bound(double bound, bool include); + void set_bound(ObItemType item_type, double bound); + + Term term_; + bool has_lower_bound_; + bool has_upper_bound_; + bool include_lower_bound_; + bool include_upper_bound_; + double lower_bound_; + double upper_bound_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObInequalJoinSelEstimator); +}; + +} +} + +#endif \ No newline at end of file diff --git a/src/sql/optimizer/ob_select_log_plan.cpp b/src/sql/optimizer/ob_select_log_plan.cpp index 4bbe49e911..b2015674b3 100644 --- a/src/sql/optimizer/ob_select_log_plan.cpp +++ b/src/sql/optimizer/ob_select_log_plan.cpp @@ -1774,6 +1774,7 @@ int ObSelectLogPlan::generate_raw_plan_for_set() ObSEArray child_remain_filters; const ObSelectStmt *child_stmt = NULL; ObSelectLogPlan *child_plan = NULL; + ObSelectLogPlan *nonrecursive_plan = NULL; for (int64 i = 0; OB_SUCC(ret) && i < child_size; ++i) { child_input_filters.reuse(); child_rename_filters.reuse(); @@ -1800,10 +1801,13 @@ int ObSelectLogPlan::generate_raw_plan_for_set() LOG_WARN("get remain filters failed", K(ret)); } else if (OB_FAIL(generate_child_plan_for_set(child_stmt, child_plan, child_rename_filters, i, - select_stmt->is_set_distinct()))) { + select_stmt->is_set_distinct(), + nonrecursive_plan))) { LOG_WARN("failed to generate left subquery plan", K(ret)); } else if (OB_FAIL(child_plans.push_back(child_plan))) { LOG_WARN("failed to push back", K(ret)); + } else if (0 == i && select_stmt->is_recursive_union()) { + nonrecursive_plan = child_plan; } } } @@ -3973,7 +3977,18 @@ int ObSelectLogPlan::allocate_distinct_set_as_top(ObLogicalOperator *left_child, set_op->assign_set_op(select_stmt->get_set_op()); set_op->set_algo_type(set_method); set_op->set_distributed_algo(dist_set_method); - if (OB_FAIL(set_op->compute_property())) { + for (int64_t i = 0; OB_SUCC(ret) && i < set_op->get_num_of_child(); i ++) { + const OptTableMeta *table_meta = get_update_table_metas().get_table_meta_by_table_id(i); + double child_ndv = 0; + if (OB_NOT_NULL(table_meta)) { + child_ndv = table_meta->get_distinct_rows(); + } + if (OB_FAIL(set_op->add_child_ndv(child_ndv))) { + LOG_WARN("failed to add child ndv", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(set_op->compute_property())) { LOG_WARN("failed to compute property", K(ret)); } else { top = set_op; @@ -4646,7 +4661,8 @@ int ObSelectLogPlan::generate_child_plan_for_set(const ObDMLStmt *sub_stmt, ObSelectLogPlan *&sub_plan, ObIArray &pushdown_filters, const uint64_t child_offset, - const bool is_set_distinct) + const bool is_set_distinct, + ObSelectLogPlan *nonrecursive_plan) { int ret = OB_SUCCESS; sub_plan = NULL; @@ -4660,7 +4676,8 @@ int ObSelectLogPlan::generate_child_plan_for_set(const ObDMLStmt *sub_stmt, *sub_stmt)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_ERROR("Failed to create logical plan", K(sub_plan), K(ret)); - } else if (FALSE_IT(sub_plan->set_is_parent_set_distinct(is_set_distinct))) { + } else if (FALSE_IT(sub_plan->set_is_parent_set_distinct(is_set_distinct)) || + FALSE_IT(sub_plan->set_nonrecursive_plan_for_fake_cte(nonrecursive_plan))) { // do nothing } else if (OB_FAIL(sub_plan->add_pushdown_filters(pushdown_filters))) { LOG_WARN("failed to add pushdown filters", K(ret)); diff --git a/src/sql/optimizer/ob_select_log_plan.h b/src/sql/optimizer/ob_select_log_plan.h index 90c4e5b8d4..66bbd03f9e 100644 --- a/src/sql/optimizer/ob_select_log_plan.h +++ b/src/sql/optimizer/ob_select_log_plan.h @@ -438,7 +438,8 @@ private: ObSelectLogPlan *&sub_plan, ObIArray &pushdown_filters, const uint64_t child_offset, - const bool is_set_distinct); + const bool is_set_distinct, + ObSelectLogPlan *nonrecursive_plan); /** * @brief GENERATE the PLAN tree FOR PLAIN SELECT stmt diff --git a/src/sql/resolver/dml/ob_hint.cpp b/src/sql/resolver/dml/ob_hint.cpp index 26a56d40ca..843da780ff 100644 --- a/src/sql/resolver/dml/ob_hint.cpp +++ b/src/sql/resolver/dml/ob_hint.cpp @@ -785,6 +785,11 @@ bool ObOptParamHint::is_param_val_valid(const OptParamType param_type, const ObO || 0 == val.get_varchar().case_compare("false")); break; } + case _ENABLE_STORAGE_CARDINALITY_ESTIMATION: { + is_valid = val.is_varchar() && (0 == val.get_varchar().case_compare("true") + || 0 == val.get_varchar().case_compare("false")); + break; + } default: LOG_TRACE("invalid opt param val", K(param_type), K(val)); break; diff --git a/src/sql/resolver/dml/ob_hint.h b/src/sql/resolver/dml/ob_hint.h index e1f82e7945..6620222721 100644 --- a/src/sql/resolver/dml/ob_hint.h +++ b/src/sql/resolver/dml/ob_hint.h @@ -108,6 +108,7 @@ struct ObOptParamHint DEF(COMPACT_SORT_LEVEL,) \ DEF(WORKAREA_SIZE_POLICY,) \ DEF(ENABLE_RICH_VECTOR_FORMAT,) \ + DEF(_ENABLE_STORAGE_CARDINALITY_ESTIMATION,) \ DECLARE_ENUM(OptParamType, opt_param, OPT_PARAM_TYPE_DEF, static); diff --git a/src/sql/session/ob_basic_session_info.cpp b/src/sql/session/ob_basic_session_info.cpp index 632853cde2..29fdd67abb 100644 --- a/src/sql/session/ob_basic_session_info.cpp +++ b/src/sql/session/ob_basic_session_info.cpp @@ -3531,6 +3531,13 @@ int ObBasicSessionInfo::is_serial_set_order_forced(bool &force_set_order, bool i return ret; } +int ObBasicSessionInfo::is_storage_estimation_enabled(bool &storage_estimation_enabled) const +{ + int ret = OB_SUCCESS; + ret = get_bool_sys_var(SYS_VAR__ENABLE_STORAGE_CARDINALITY_ESTIMATION, storage_estimation_enabled); + return ret; +} + int ObBasicSessionInfo::is_select_index_enabled(bool &select_index_enabled) const { return get_bool_sys_var(SYS_VAR_OB_ENABLE_INDEX_DIRECT_SELECT, select_index_enabled); diff --git a/src/sql/session/ob_basic_session_info.h b/src/sql/session/ob_basic_session_info.h index 63637a9fb2..376c3405b2 100644 --- a/src/sql/session/ob_basic_session_info.h +++ b/src/sql/session/ob_basic_session_info.h @@ -1001,6 +1001,7 @@ public: int if_aggr_pushdown_allowed(bool &aggr_pushdown_allowed) const; int is_transformation_enabled(bool &transformation_enabled) const; int is_serial_set_order_forced(bool &force_set_order, bool is_oracle_mode) const; + int is_storage_estimation_enabled(bool &storage_estimation_enabled) const; bool is_use_trace_log() const { return sys_vars_cache_.get_ob_enable_trace_log(); diff --git a/src/storage/access/ob_table_estimator.cpp b/src/storage/access/ob_table_estimator.cpp index 5b0f67462b..30d0232731 100644 --- a/src/storage/access/ob_table_estimator.cpp +++ b/src/storage/access/ob_table_estimator.cpp @@ -51,10 +51,13 @@ int ObTableEstimator::estimate_row_count_for_scan( int ret = OB_SUCCESS; part_estimate.reset(); est_records.reuse(); - if (OB_UNLIKELY(base_input.is_table_invalid() || ranges.count() <= 0)) { + if (OB_UNLIKELY(base_input.is_table_invalid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(base_input.table_id_), K(ranges), K(base_input.tables_.count())); + } else if (ranges.empty()) { + part_estimate.logical_row_count_ = 0; + part_estimate.physical_row_count_ = 0; } else { ObPartitionEst table_est; ObEstRowCountRecord record; diff --git a/tools/deploy/mysql_test/r/mysql/view_2.result b/tools/deploy/mysql_test/r/mysql/view_2.result index 8f7af3b804..41940ca2f7 100644 --- a/tools/deploy/mysql_test/r/mysql/view_2.result +++ b/tools/deploy/mysql_test/r/mysql/view_2.result @@ -483,6 +483,7 @@ Optimization Info: avaiable_index_name:[t11] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t21: table_rows:1 physical_range_rows:1 @@ -494,6 +495,7 @@ Optimization Info: avaiable_index_name:[t21] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] Plan Type: LOCAL Note: diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result index 11e7aa5206..0b120b7ed8 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result @@ -42,8 +42,8 @@ Query Plan ================================================================ |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ---------------------------------------------------------------- -|0 |SORT | |13 |36 | -|1 |└─NESTED-LOOP JOIN | |13 |35 | +|0 |SORT | |6 |35 | +|1 |└─NESTED-LOOP JOIN | |6 |35 | |2 | ├─COLUMN TABLE FULL SCAN |t4 |2 |3 | |3 | └─DISTRIBUTED TABLE RANGE SCAN|t3 |3 |16 | ================================================================ diff --git a/tools/deploy/mysql_test/test_suite/executor/r/mysql/basic.result b/tools/deploy/mysql_test/test_suite/executor/r/mysql/basic.result index 7595beed11..9f2e768fb1 100644 --- a/tools/deploy/mysql_test/test_suite/executor/r/mysql/basic.result +++ b/tools/deploy/mysql_test/test_suite/executor/r/mysql/basic.result @@ -1551,7 +1551,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |7 |43 | +|0 |HASH JOIN | |4 |43 | |1 |├─PX COORDINATOR | |5 |19 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |17 | |3 |│ └─PX PARTITION ITERATOR| |5 |11 | @@ -1593,7 +1593,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |10 |43 | +|0 |HASH JOIN | |4 |43 | |1 |├─PX COORDINATOR | |5 |19 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |17 | |3 |│ └─PX PARTITION ITERATOR| |5 |11 | @@ -1635,7 +1635,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |5 |43 | +|0 |HASH JOIN | |4 |43 | |1 |├─PX COORDINATOR | |5 |19 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |17 | |3 |│ └─PX PARTITION ITERATOR| |5 |11 | @@ -1816,7 +1816,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |7 |41 | +|0 |HASH JOIN | |4 |40 | |1 |├─PX COORDINATOR | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |14 | |3 |│ └─PX PARTITION ITERATOR| |5 |9 | @@ -1858,7 +1858,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |10 |41 | +|0 |HASH JOIN | |4 |40 | |1 |├─PX COORDINATOR | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |14 | |3 |│ └─PX PARTITION ITERATOR| |5 |9 | @@ -1900,7 +1900,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |5 |40 | +|0 |HASH JOIN | |4 |40 | |1 |├─PX COORDINATOR | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |14 | |3 |│ └─PX PARTITION ITERATOR| |5 |9 | @@ -2733,9 +2733,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |5 |42 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|5 |37 | -|2 | └─HASH JOIN | |5 |27 | +|0 |PX COORDINATOR | |4 |39 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|4 |35 | +|2 | └─HASH JOIN | |4 |27 | |3 | ├─PX PARTITION ITERATOR | |4 |9 | |4 | │ └─TABLE FULL SCAN |t2 |4 |9 | |5 | └─EXCHANGE IN DISTR | |5 |17 | @@ -2775,9 +2775,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |5 |43 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|5 |38 | -|2 | └─HASH JOIN | |5 |27 | +|0 |PX COORDINATOR | |4 |40 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|4 |36 | +|2 | └─HASH JOIN | |4 |27 | |3 | ├─PX PARTITION ITERATOR | |4 |9 | |4 | │ └─TABLE FULL SCAN |t2 |4 |9 | |5 | └─EXCHANGE IN DISTR | |5 |17 | @@ -2817,7 +2817,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |5 |34 | +|0 |HASH JOIN | |4 |33 | |1 |├─PX COORDINATOR | |4 |16 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|4 |14 | |3 |│ └─PX PARTITION ITERATOR| |4 |9 | @@ -2860,9 +2860,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |5 |42 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|5 |37 | -|2 | └─HASH JOIN | |5 |27 | +|0 |PX COORDINATOR | |4 |39 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|4 |35 | +|2 | └─HASH JOIN | |4 |27 | |3 | ├─PX PARTITION ITERATOR | |4 |9 | |4 | │ └─TABLE FULL SCAN |t1 |4 |9 | |5 | └─EXCHANGE IN DISTR | |5 |17 | @@ -3033,9 +3033,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |5 |43 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|5 |38 | -|2 | └─HASH JOIN | |5 |27 | +|0 |PX COORDINATOR | |4 |40 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|4 |36 | +|2 | └─HASH JOIN | |4 |27 | |3 | ├─PX PARTITION ITERATOR | |4 |9 | |4 | │ └─TABLE FULL SCAN |t1 |4 |9 | |5 | └─EXCHANGE IN DISTR | |5 |17 | @@ -3344,7 +3344,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |7 |43 | +|0 |HASH JOIN | |4 |43 | |1 |├─PX COORDINATOR | |5 |19 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |17 | |3 |│ └─PX PARTITION ITERATOR| |5 |11 | @@ -3386,7 +3386,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |10 |43 | +|0 |HASH JOIN | |4 |43 | |1 |├─PX COORDINATOR | |5 |19 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |17 | |3 |│ └─PX PARTITION ITERATOR| |5 |11 | @@ -3428,7 +3428,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |5 |43 | +|0 |HASH JOIN | |4 |43 | |1 |├─PX COORDINATOR | |5 |19 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |17 | |3 |│ └─PX PARTITION ITERATOR| |5 |11 | @@ -3609,7 +3609,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |7 |41 | +|0 |HASH JOIN | |4 |40 | |1 |├─PX COORDINATOR | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |14 | |3 |│ └─PX PARTITION ITERATOR| |5 |9 | @@ -3651,7 +3651,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |10 |41 | +|0 |HASH JOIN | |4 |40 | |1 |├─PX COORDINATOR | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |14 | |3 |│ └─PX PARTITION ITERATOR| |5 |9 | @@ -3693,7 +3693,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |5 |40 | +|0 |HASH JOIN | |4 |40 | |1 |├─PX COORDINATOR | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |14 | |3 |│ └─PX PARTITION ITERATOR| |5 |9 | @@ -4320,9 +4320,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |2 |31 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|2 |29 | -|2 | └─HASH JOIN | |2 |24 | +|0 |PX COORDINATOR | |1 |29 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|1 |28 | +|2 | └─HASH JOIN | |1 |24 | |3 | ├─EXCHANGE IN DISTR | |4 |16 | |4 | │ └─EXCHANGE OUT DISTR (PKEY)|:EX10000|4 |14 | |5 | │ └─PX PARTITION ITERATOR | |4 |9 | @@ -4362,9 +4362,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |2 |35 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|2 |31 | -|2 | └─HASH JOIN | |2 |24 | +|0 |PX COORDINATOR | |1 |29 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|1 |28 | +|2 | └─HASH JOIN | |1 |24 | |3 | ├─EXCHANGE IN DISTR | |4 |16 | |4 | │ └─EXCHANGE OUT DISTR (PKEY)|:EX10000|4 |14 | |5 | │ └─PX PARTITION ITERATOR | |4 |9 | @@ -4404,9 +4404,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |1 |29 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|1 |28 | -|2 | └─HASH JOIN | |1 |24 | +|0 |PX COORDINATOR | |4 |46 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|4 |39 | +|2 | └─HASH JOIN | |4 |24 | |3 | ├─EXCHANGE IN DISTR | |4 |16 | |4 | │ └─EXCHANGE OUT DISTR (PKEY)|:EX10000|4 |14 | |5 | │ └─PX PARTITION ITERATOR | |4 |9 | @@ -4791,9 +4791,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |5 |42 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|5 |37 | -|2 | └─HASH JOIN | |5 |27 | +|0 |PX COORDINATOR | |4 |39 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|4 |35 | +|2 | └─HASH JOIN | |4 |27 | |3 | ├─PX PARTITION ITERATOR | |4 |9 | |4 | │ └─TABLE FULL SCAN |t2 |4 |9 | |5 | └─EXCHANGE IN DISTR | |5 |17 | @@ -4833,9 +4833,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |5 |43 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|5 |38 | -|2 | └─HASH JOIN | |5 |27 | +|0 |PX COORDINATOR | |4 |40 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|4 |36 | +|2 | └─HASH JOIN | |4 |27 | |3 | ├─PX PARTITION ITERATOR | |4 |9 | |4 | │ └─TABLE FULL SCAN |t2 |4 |9 | |5 | └─EXCHANGE IN DISTR | |5 |17 | @@ -4875,7 +4875,7 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |HASH JOIN | |5 |34 | +|0 |HASH JOIN | |4 |33 | |1 |├─PX COORDINATOR | |4 |16 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|4 |14 | |3 |│ └─PX PARTITION ITERATOR| |4 |9 | @@ -4918,9 +4918,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |5 |42 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|5 |37 | -|2 | └─HASH JOIN | |5 |27 | +|0 |PX COORDINATOR | |4 |39 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|4 |35 | +|2 | └─HASH JOIN | |4 |27 | |3 | ├─PX PARTITION ITERATOR | |4 |9 | |4 | │ └─TABLE FULL SCAN |t1 |4 |9 | |5 | └─EXCHANGE IN DISTR | |5 |17 | @@ -5091,9 +5091,9 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |5 |43 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|5 |38 | -|2 | └─HASH JOIN | |5 |27 | +|0 |PX COORDINATOR | |4 |40 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|4 |36 | +|2 | └─HASH JOIN | |4 |27 | |3 | ├─PX PARTITION ITERATOR | |4 |9 | |4 | │ └─TABLE FULL SCAN |t1 |4 |9 | |5 | └─EXCHANGE IN DISTR | |5 |17 | diff --git a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_bugfix_mysql.result b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_bugfix_mysql.result index 154c2e8e37..4c183a959a 100644 --- a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_bugfix_mysql.result +++ b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_bugfix_mysql.result @@ -432,9 +432,9 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |NESTED-LOOP JOIN | |21 |345 | +|0 |NESTED-LOOP JOIN | |21 |306 | |1 |├─TABLE FULL SCAN |a |9 |3 | -|2 |└─DISTRIBUTED TABLE FULL SCAN|b(idx)|3 |38 | +|2 |└─DISTRIBUTED TABLE FULL SCAN|b(idx)|3 |33 | =============================================================== Outputs & filters: ------------------------------------- @@ -453,9 +453,9 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |NESTED-LOOP JOIN | |21 |345 | +|0 |NESTED-LOOP JOIN | |21 |306 | |1 |├─TABLE FULL SCAN |a |9 |3 | -|2 |└─DISTRIBUTED TABLE FULL SCAN|b(idx)|3 |38 | +|2 |└─DISTRIBUTED TABLE FULL SCAN|b(idx)|3 |33 | =============================================================== Outputs & filters: ------------------------------------- @@ -474,9 +474,9 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |NESTED-LOOP JOIN | |21 |345 | +|0 |NESTED-LOOP JOIN | |21 |306 | |1 |├─TABLE FULL SCAN |a |9 |3 | -|2 |└─DISTRIBUTED TABLE FULL SCAN|b(idx)|3 |38 | +|2 |└─DISTRIBUTED TABLE FULL SCAN|b(idx)|3 |33 | =============================================================== Outputs & filters: ------------------------------------- @@ -495,9 +495,9 @@ Query Plan =============================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------- -|0 |NESTED-LOOP JOIN | |21 |345 | +|0 |NESTED-LOOP JOIN | |21 |306 | |1 |├─TABLE FULL SCAN |a |9 |3 | -|2 |└─DISTRIBUTED TABLE FULL SCAN|b(idx)|3 |38 | +|2 |└─DISTRIBUTED TABLE FULL SCAN|b(idx)|3 |33 | =============================================================== Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result index f754acba51..62017d112f 100644 --- a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result +++ b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result @@ -135,6 +135,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -196,6 +197,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -257,6 +259,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -319,6 +322,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -380,6 +384,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -442,6 +447,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -504,6 +510,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -565,6 +572,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -685,6 +693,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -746,6 +755,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1016,6 +1026,7 @@ Optimization Info: pruned_index_name:[geo_table2] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1085,6 +1096,7 @@ Optimization Info: pruned_index_name:[geo_table2] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1170,6 +1182,7 @@ Optimization Info: pruned_index_name:[geo_table] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1387,6 +1400,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1448,6 +1462,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1509,6 +1524,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1572,6 +1588,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1633,6 +1650,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1695,6 +1713,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1757,6 +1776,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1818,6 +1838,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1991,6 +2012,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2052,6 +2074,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2113,6 +2136,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2175,6 +2199,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2236,6 +2261,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2298,6 +2324,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2360,6 +2387,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2421,6 +2449,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2482,6 +2511,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2543,6 +2573,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2604,6 +2635,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2667,6 +2699,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2728,6 +2761,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2790,6 +2824,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2852,6 +2887,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2913,6 +2949,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2996,6 +3033,7 @@ Optimization Info: pruned_index_name:[t] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: diff --git a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result index 98cf32507a..732037e7eb 100644 --- a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result +++ b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result @@ -249,6 +249,7 @@ Optimization Info: pruned_index_name:[idx] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -297,6 +298,7 @@ Optimization Info: pruned_index_name:[idx] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -352,6 +354,7 @@ Optimization Info: unstable_index_name:[tt2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -404,6 +407,7 @@ Optimization Info: pruned_index_name:[tt2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -456,6 +460,7 @@ Optimization Info: unstable_index_name:[tt2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -508,6 +513,7 @@ Optimization Info: pruned_index_name:[tt2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: diff --git a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/spatial_relation_join_mysql.result b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/spatial_relation_join_mysql.result index fbdcaf3847..06ce1c9ae7 100644 --- a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/spatial_relation_join_mysql.result +++ b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/spatial_relation_join_mysql.result @@ -91,6 +91,7 @@ Optimization Info: avaiable_index_name:[tgnoindex2] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] tgnoindex1: table_rows:3 physical_range_rows:3 @@ -102,6 +103,7 @@ Optimization Info: avaiable_index_name:[tgnoindex1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] Plan Type: LOCAL Note: @@ -196,6 +198,7 @@ Optimization Info: pruned_index_name:[gidx1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] tgeom2: table_rows:10 physical_range_rows:2 @@ -207,6 +210,7 @@ Optimization Info: avaiable_index_name:[gidx2, tgeom2] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -310,6 +314,7 @@ Optimization Info: pruned_index_name:[pgidx1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] ptgeom2: table_rows:10 physical_range_rows:2 @@ -321,6 +326,7 @@ Optimization Info: avaiable_index_name:[pgidx2, ptgeom2] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: DISTRIBUTED Note: diff --git a/tools/deploy/mysql_test/test_suite/global_index/r/mysql/global_index_lookup_1.result b/tools/deploy/mysql_test/test_suite/global_index/r/mysql/global_index_lookup_1.result index 86e4e51475..0b130be72c 100644 --- a/tools/deploy/mysql_test/test_suite/global_index/r/mysql/global_index_lookup_1.result +++ b/tools/deploy/mysql_test/test_suite/global_index/r/mysql/global_index_lookup_1.result @@ -629,9 +629,9 @@ Query Plan ================================================================ |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------------------- -|0 |SUBPLAN FILTER | |1 |68 | +|0 |SUBPLAN FILTER | |1 |93 | |1 |├─TABLE FULL SCAN |t1 |2 |3 | -|2 |└─DISTRIBUTED TABLE RANGE SCAN|t2(i1)|1 |33 | +|2 |└─DISTRIBUTED TABLE RANGE SCAN|t2(i1)|1 |46 | ================================================================ Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result b/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result index 477e08d6b9..f7e04ef3e4 100644 --- a/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result +++ b/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result @@ -115,7 +115,7 @@ Query Plan ======================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------- -|0 |NESTED-LOOP ANTI JOIN | |7 |4 | +|0 |NESTED-LOOP ANTI JOIN | |8 |4 | |1 |├─TABLE FULL SCAN |xy_t1|8 |3 | |2 |└─MATERIAL | |5 |3 | |3 | └─TABLE FULL SCAN |xy_t2|5 |3 | @@ -151,7 +151,7 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |MERGE ANTI JOIN | |7 |7 | +|0 |MERGE ANTI JOIN | |8 |7 | |1 |├─SORT | |8 |4 | |2 |│ └─TABLE FULL SCAN|xy_t1|8 |3 | |3 |└─SORT | |5 |3 | @@ -192,7 +192,7 @@ Query Plan ====================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------ -|0 |HASH RIGHT ANTI JOIN | |7 |7 | +|0 |HASH RIGHT ANTI JOIN | |8 |7 | |1 |├─TABLE FULL SCAN |xy_t2|5 |3 | |2 |└─TABLE FULL SCAN |xy_t1|8 |3 | ====================================================== @@ -335,7 +335,7 @@ Query Plan ======================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------- -|0 |NESTED-LOOP SEMI JOIN | |2 |4 | +|0 |NESTED-LOOP SEMI JOIN | |1 |4 | |1 |├─TABLE FULL SCAN |xy_t1|8 |3 | |2 |└─MATERIAL | |5 |3 | |3 | └─TABLE FULL SCAN |xy_t2|5 |3 | @@ -365,7 +365,7 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |MERGE SEMI JOIN | |2 |7 | +|0 |MERGE SEMI JOIN | |1 |7 | |1 |├─SORT | |8 |4 | |2 |│ └─TABLE FULL SCAN|xy_t1|8 |3 | |3 |└─SORT | |5 |3 | @@ -400,7 +400,7 @@ Query Plan ====================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------ -|0 |HASH RIGHT SEMI JOIN | |2 |7 | +|0 |HASH RIGHT SEMI JOIN | |1 |7 | |1 |├─TABLE FULL SCAN |xy_t2|5 |3 | |2 |└─TABLE FULL SCAN |xy_t1|8 |3 | ====================================================== @@ -430,7 +430,7 @@ Query Plan ======================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------- -|0 |NESTED-LOOP ANTI JOIN | |7 |4 | +|0 |NESTED-LOOP ANTI JOIN | |8 |4 | |1 |├─TABLE FULL SCAN |xy_t1|8 |3 | |2 |└─MATERIAL | |5 |3 | |3 | └─TABLE FULL SCAN |xy_t2|5 |3 | @@ -466,7 +466,7 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |MERGE ANTI JOIN | |7 |7 | +|0 |MERGE ANTI JOIN | |8 |7 | |1 |├─SORT | |8 |4 | |2 |│ └─TABLE FULL SCAN|xy_t1|8 |3 | |3 |└─SORT | |5 |3 | @@ -507,7 +507,7 @@ Query Plan ====================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------ -|0 |HASH RIGHT ANTI JOIN | |7 |7 | +|0 |HASH RIGHT ANTI JOIN | |8 |7 | |1 |├─TABLE FULL SCAN |xy_t2|5 |3 | |2 |└─TABLE FULL SCAN |xy_t1|8 |3 | ====================================================== @@ -541,7 +541,7 @@ Query Plan ======================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------- -|0 |NESTED-LOOP SEMI JOIN | |2 |4 | +|0 |NESTED-LOOP SEMI JOIN | |1 |4 | |1 |├─TABLE FULL SCAN |xy_t1|8 |3 | |2 |└─MATERIAL | |5 |3 | |3 | └─TABLE FULL SCAN |xy_t2|5 |3 | @@ -571,7 +571,7 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |MERGE SEMI JOIN | |2 |7 | +|0 |MERGE SEMI JOIN | |1 |7 | |1 |├─SORT | |8 |4 | |2 |│ └─TABLE FULL SCAN|xy_t1|8 |3 | |3 |└─SORT | |5 |3 | @@ -606,7 +606,7 @@ Query Plan ====================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------ -|0 |HASH RIGHT SEMI JOIN | |2 |7 | +|0 |HASH RIGHT SEMI JOIN | |1 |7 | |1 |├─TABLE FULL SCAN |xy_t2|5 |3 | |2 |└─TABLE FULL SCAN |xy_t1|8 |3 | ====================================================== @@ -2831,8 +2831,8 @@ Query Plan |0 |SCALAR GROUP BY | |1 |4168 | |1 |└─SUBPLAN FILTER | |29 |4167 | |2 | ├─TABLE FULL SCAN |xy_t1 |116 |8 | -|3 | ├─DISTRIBUTED TABLE RANGE SCAN|xy_t3(idx_c2)|2 |18 | -|4 | └─DISTRIBUTED TABLE RANGE SCAN|xy_t2(idx_c2)|2 |18 | +|3 | ├─DISTRIBUTED TABLE RANGE SCAN|xy_t3(idx_c2)|1 |18 | +|4 | └─DISTRIBUTED TABLE RANGE SCAN|xy_t2(idx_c2)|1 |18 | ========================================================================= Outputs & filters: ------------------------------------- @@ -3483,8 +3483,8 @@ Query Plan ======================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| -------------------------------------------------------- -|0 |SORT | |2 |7 | -|1 |└─HASH RIGHT SEMI JOIN | |2 |7 | +|0 |SORT | |1 |7 | +|1 |└─HASH RIGHT SEMI JOIN | |1 |7 | |2 | ├─TABLE FULL SCAN |xy_t2|5 |3 | |3 | └─TABLE FULL SCAN |xy_t1|8 |3 | ======================================================== @@ -3513,8 +3513,8 @@ Query Plan ====================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------ -|0 |SORT | |2 |7 | -|1 |└─MERGE SEMI JOIN | |2 |7 | +|0 |SORT | |1 |7 | +|1 |└─MERGE SEMI JOIN | |1 |7 | |2 | ├─SORT | |8 |4 | |3 | │ └─TABLE FULL SCAN|xy_t1|8 |3 | |4 | └─SORT | |5 |3 | @@ -3550,8 +3550,8 @@ Query Plan ========================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------- -|0 |SORT | |2 |4 | -|1 |└─NESTED-LOOP SEMI JOIN | |2 |4 | +|0 |SORT | |1 |4 | +|1 |└─NESTED-LOOP SEMI JOIN | |1 |4 | |2 | ├─TABLE FULL SCAN |xy_t1|8 |3 | |3 | └─MATERIAL | |5 |3 | |4 | └─TABLE FULL SCAN |xy_t2|5 |3 | @@ -3615,8 +3615,8 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |SORT | |2 |7 | -|1 |└─HASH SEMI JOIN | |2 |7 | +|0 |SORT | |1 |7 | +|1 |└─HASH SEMI JOIN | |1 |7 | |2 | ├─TABLE FULL SCAN|xy_t2|5 |3 | |3 | └─TABLE FULL SCAN|xy_t1|8 |3 | ==================================================== @@ -3645,8 +3645,8 @@ Query Plan ====================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------ -|0 |SORT | |2 |7 | -|1 |└─MERGE SEMI JOIN | |2 |7 | +|0 |SORT | |1 |7 | +|1 |└─MERGE SEMI JOIN | |1 |7 | |2 | ├─SORT | |5 |3 | |3 | │ └─TABLE FULL SCAN|xy_t2|5 |3 | |4 | └─SORT | |8 |4 | @@ -3682,8 +3682,8 @@ Query Plan ========================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------- -|0 |SORT | |2 |4 | -|1 |└─NESTED-LOOP SEMI JOIN | |2 |3 | +|0 |SORT | |1 |3 | +|1 |└─NESTED-LOOP SEMI JOIN | |1 |3 | |2 | ├─TABLE FULL SCAN |xy_t2|5 |3 | |3 | └─MATERIAL | |8 |4 | |4 | └─TABLE FULL SCAN |xy_t1|8 |3 | @@ -3747,8 +3747,8 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |SORT | |2 |7 | -|1 |└─HASH JOIN | |2 |7 | +|0 |SORT | |1 |7 | +|1 |└─HASH JOIN | |1 |7 | |2 | ├─TABLE FULL SCAN|xy_t2|5 |3 | |3 | └─TABLE FULL SCAN|xy_t1|8 |3 | ==================================================== @@ -3776,7 +3776,7 @@ Query Plan ================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| -------------------------------------------------- -|0 |MERGE JOIN | |2 |6 | +|0 |MERGE JOIN | |1 |6 | |1 |├─TABLE FULL SCAN|xy_t2|5 |3 | |2 |└─TABLE FULL SCAN|xy_t1|8 |3 | ================================================== @@ -3832,8 +3832,8 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |SORT | |2 |8 | -|1 |└─HASH SEMI JOIN | |2 |7 | +|0 |SORT | |1 |7 | +|1 |└─HASH SEMI JOIN | |1 |7 | |2 | ├─TABLE FULL SCAN|xy_t2|5 |3 | |3 | └─TABLE FULL SCAN|xy_t1|8 |3 | ==================================================== @@ -3862,7 +3862,7 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |MERGE SEMI JOIN | |2 |7 | +|0 |MERGE SEMI JOIN | |1 |7 | |1 |├─TABLE FULL SCAN |xy_t2|5 |3 | |2 |└─SORT | |8 |4 | |3 | └─TABLE FULL SCAN|xy_t1|8 |3 | @@ -3893,7 +3893,7 @@ Query Plan ======================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------- -|0 |NESTED-LOOP SEMI JOIN | |2 |3 | +|0 |NESTED-LOOP SEMI JOIN | |1 |3 | |1 |├─TABLE FULL SCAN |xy_t2|5 |3 | |2 |└─MATERIAL | |8 |4 | |3 | └─TABLE FULL SCAN |xy_t1|8 |3 | @@ -3952,8 +3952,8 @@ Query Plan ======================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| -------------------------------------------------------- -|0 |SORT | |7 |8 | -|1 |└─HASH RIGHT ANTI JOIN | |7 |7 | +|0 |SORT | |8 |8 | +|1 |└─HASH RIGHT ANTI JOIN | |8 |7 | |2 | ├─TABLE FULL SCAN |xy_t2|5 |3 | |3 | └─TABLE FULL SCAN |xy_t1|8 |3 | ======================================================== @@ -3988,8 +3988,8 @@ Query Plan ====================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------ -|0 |SORT | |7 |8 | -|1 |└─MERGE ANTI JOIN | |7 |7 | +|0 |SORT | |8 |8 | +|1 |└─MERGE ANTI JOIN | |8 |7 | |2 | ├─SORT | |8 |4 | |3 | │ └─TABLE FULL SCAN|xy_t1|8 |3 | |4 | └─SORT | |5 |3 | @@ -4031,8 +4031,8 @@ Query Plan ========================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------- -|0 |SORT | |7 |4 | -|1 |└─NESTED-LOOP ANTI JOIN | |7 |4 | +|0 |SORT | |8 |4 | +|1 |└─NESTED-LOOP ANTI JOIN | |8 |4 | |2 | ├─TABLE FULL SCAN |xy_t1|8 |3 | |3 | └─MATERIAL | |5 |3 | |4 | └─TABLE FULL SCAN |xy_t2|5 |3 | @@ -4108,8 +4108,8 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |SORT | |4 |7 | -|1 |└─HASH ANTI JOIN | |4 |7 | +|0 |SORT | |5 |7 | +|1 |└─HASH ANTI JOIN | |5 |7 | |2 | ├─TABLE FULL SCAN|xy_t2|5 |3 | |3 | └─TABLE FULL SCAN|xy_t1|8 |3 | ==================================================== @@ -4141,8 +4141,8 @@ Query Plan ====================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------ -|0 |SORT | |4 |7 | -|1 |└─MERGE ANTI JOIN | |4 |7 | +|0 |SORT | |5 |7 | +|1 |└─MERGE ANTI JOIN | |5 |7 | |2 | ├─SORT | |5 |3 | |3 | │ └─TABLE FULL SCAN|xy_t2|5 |3 | |4 | └─SORT | |8 |4 | @@ -4181,8 +4181,8 @@ Query Plan ========================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------- -|0 |SORT | |4 |4 | -|1 |└─NESTED-LOOP ANTI JOIN | |4 |3 | +|0 |SORT | |5 |4 | +|1 |└─NESTED-LOOP ANTI JOIN | |5 |3 | |2 | ├─TABLE FULL SCAN |xy_t2|5 |3 | |3 | └─MATERIAL | |8 |4 | |4 | └─TABLE FULL SCAN |xy_t1|8 |3 | @@ -4252,8 +4252,8 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |SORT | |4 |8 | -|1 |└─HASH ANTI JOIN | |4 |7 | +|0 |SORT | |5 |8 | +|1 |└─HASH ANTI JOIN | |5 |7 | |2 | ├─TABLE FULL SCAN|xy_t2|5 |3 | |3 | └─TABLE FULL SCAN|xy_t1|8 |3 | ==================================================== @@ -4286,7 +4286,7 @@ Query Plan ================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| -------------------------------------------------- -|0 |MERGE ANTI JOIN | |4 |6 | +|0 |MERGE ANTI JOIN | |5 |6 | |1 |├─TABLE FULL SCAN|xy_t2|5 |3 | |2 |└─TABLE FULL SCAN|xy_t1|8 |3 | ================================================== @@ -4318,7 +4318,7 @@ Query Plan ======================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| -------------------------------------------------------- -|0 |NESTED-LOOP ANTI JOIN | |4 |96 | +|0 |NESTED-LOOP ANTI JOIN | |5 |96 | |1 |├─TABLE FULL SCAN |xy_t2|5 |3 | |2 |└─DISTRIBUTED TABLE GET|xy_t1|1 |18 | ======================================================== @@ -4385,10 +4385,10 @@ Query Plan ======================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| -------------------------------------------------------- -|0 |SORT | |2 |7 | -|1 |└─HASH RIGHT ANTI JOIN | |2 |6 | -|2 | ├─SUBPLAN SCAN |VIEW1|3 |3 | -|3 | │ └─TABLE FULL SCAN |xy_t1|3 |3 | +|0 |SORT | |4 |6 | +|1 |└─HASH RIGHT ANTI JOIN | |4 |6 | +|2 | ├─SUBPLAN SCAN |VIEW1|1 |3 | +|3 | │ └─TABLE FULL SCAN |xy_t1|1 |3 | |4 | └─TABLE FULL SCAN |xy_t2|5 |3 | ======================================================== Outputs & filters: @@ -4422,11 +4422,11 @@ Query Plan ====================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------ -|0 |SORT | |2 |7 | -|1 |└─HASH ANTI JOIN | |2 |7 | +|0 |SORT | |4 |7 | +|1 |└─HASH ANTI JOIN | |4 |7 | |2 | ├─TABLE FULL SCAN |xy_t2|5 |3 | -|3 | └─SUBPLAN SCAN |VIEW1|3 |3 | -|4 | └─TABLE FULL SCAN|xy_t1|3 |3 | +|3 | └─SUBPLAN SCAN |VIEW1|1 |3 | +|4 | └─TABLE FULL SCAN|xy_t1|1 |3 | ====================================================== Outputs & filters: ------------------------------------- @@ -4459,11 +4459,11 @@ Query Plan ====================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------ -|0 |SORT | |2 |7 | -|1 |└─HASH ANTI JOIN | |2 |7 | +|0 |SORT | |4 |7 | +|1 |└─HASH ANTI JOIN | |4 |7 | |2 | ├─TABLE FULL SCAN |xy_t2|5 |3 | -|3 | └─SUBPLAN SCAN |VIEW1|3 |3 | -|4 | └─TABLE FULL SCAN|xy_t1|3 |3 | +|3 | └─SUBPLAN SCAN |VIEW1|1 |3 | +|4 | └─TABLE FULL SCAN|xy_t1|1 |3 | ====================================================== Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null.result b/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null.result index bcdd38c550..4677923e01 100644 --- a/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null.result +++ b/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null.result @@ -110,10 +110,10 @@ Query Plan =========================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------------------- -|0 |NESTED-LOOP JOIN CARTESIAN | |3 |3 | +|0 |NESTED-LOOP JOIN CARTESIAN | |0 |3 | |1 |├─TABLE FULL SCAN |b |3 |3 | -|2 |└─MATERIAL | |1 |3 | -|3 | └─TABLE RANGE SCAN |a |1 |3 | +|2 |└─MATERIAL | |0 |3 | +|3 | └─TABLE RANGE SCAN |a |0 |3 | =========================================================== Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon.result b/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon.result index bf2afca710..c715cdcc50 100644 --- a/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon.result +++ b/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon.result @@ -112,10 +112,10 @@ Query Plan =========================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------------------- -|0 |NESTED-LOOP JOIN CARTESIAN | |3 |3 | +|0 |NESTED-LOOP JOIN CARTESIAN | |0 |3 | |1 |├─TABLE FULL SCAN |b |3 |3 | -|2 |└─MATERIAL | |1 |3 | -|3 | └─TABLE RANGE SCAN |a |1 |3 | +|2 |└─MATERIAL | |0 |3 | +|3 | └─TABLE RANGE SCAN |a |0 |3 | =========================================================== Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon_where.result b/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon_where.result index b5c6753491..d2e6e9ef82 100644 --- a/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon_where.result +++ b/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon_where.result @@ -112,10 +112,10 @@ Query Plan =========================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------------------- -|0 |NESTED-LOOP JOIN CARTESIAN | |3 |3 | +|0 |NESTED-LOOP JOIN CARTESIAN | |0 |3 | |1 |├─TABLE FULL SCAN |b |3 |3 | -|2 |└─MATERIAL | |1 |3 | -|3 | └─TABLE RANGE SCAN |a |1 |3 | +|2 |└─MATERIAL | |0 |3 | +|3 | └─TABLE RANGE SCAN |a |0 |3 | =========================================================== Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/px/r/mysql/alloc_material_for_producer_consumer_schedule_mode.result b/tools/deploy/mysql_test/test_suite/px/r/mysql/alloc_material_for_producer_consumer_schedule_mode.result index ea776aa6ff..e423b4e270 100644 --- a/tools/deploy/mysql_test/test_suite/px/r/mysql/alloc_material_for_producer_consumer_schedule_mode.result +++ b/tools/deploy/mysql_test/test_suite/px/r/mysql/alloc_material_for_producer_consumer_schedule_mode.result @@ -8,14 +8,14 @@ Query Plan ==================================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------------------ -|0 |TEMP TABLE TRANSFORMATION | |1 |6 | -|1 |├─PX COORDINATOR | |1 |4 | -|2 |│ └─EXCHANGE OUT DISTR |:EX10001 |1 |4 | -|3 |│ └─TEMP TABLE INSERT |TEMP1 |1 |4 | +|0 |TEMP TABLE TRANSFORMATION | |1 |5 | +|1 |├─PX COORDINATOR | |1 |3 | +|2 |│ └─EXCHANGE OUT DISTR |:EX10001 |1 |3 | +|3 |│ └─TEMP TABLE INSERT |TEMP1 |1 |3 | |4 |│ └─HASH GROUP BY | |1 |3 | -|5 |│ └─EXCHANGE IN DISTR | |2 |3 | -|6 |│ └─EXCHANGE OUT DISTR (HASH) |:EX10000 |2 |3 | -|7 |│ └─HASH GROUP BY | |2 |2 | +|5 |│ └─EXCHANGE IN DISTR | |1 |3 | +|6 |│ └─EXCHANGE OUT DISTR (HASH) |:EX10000 |1 |2 | +|7 |│ └─HASH GROUP BY | |1 |2 | |8 |│ └─PX BLOCK ITERATOR | |1 |2 | |9 |│ └─TABLE FULL SCAN |t1 |1 |2 | |10|└─PX COORDINATOR | |1 |2 | @@ -73,12 +73,12 @@ Query Plan ===================================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------------------- -|0 |PX COORDINATOR | |1 |6 | -|1 |└─EXCHANGE OUT DISTR |:EX10003|1 |6 | -|2 | └─HASH GROUP BY | |1 |6 | -|3 | └─EXCHANGE IN DISTR | |2 |5 | -|4 | └─EXCHANGE OUT DISTR (HASH) |:EX10002|2 |5 | -|5 | └─HASH GROUP BY | |2 |4 | +|0 |PX COORDINATOR | |1 |5 | +|1 |└─EXCHANGE OUT DISTR |:EX10003|1 |5 | +|2 | └─HASH GROUP BY | |1 |5 | +|3 | └─EXCHANGE IN DISTR | |1 |5 | +|4 | └─EXCHANGE OUT DISTR (HASH) |:EX10002|1 |4 | +|5 | └─HASH GROUP BY | |1 |4 | |6 | └─SUBPLAN SCAN |VIEW1 |1 |4 | |7 | └─MERGE GROUP BY | |1 |4 | |8 | └─PARTITION SORT | |1 |4 | diff --git a/tools/deploy/mysql_test/test_suite/px/r/mysql/join_pwj.result b/tools/deploy/mysql_test/test_suite/px/r/mysql/join_pwj.result index 792976be88..e1d6142273 100644 --- a/tools/deploy/mysql_test/test_suite/px/r/mysql/join_pwj.result +++ b/tools/deploy/mysql_test/test_suite/px/r/mysql/join_pwj.result @@ -74,7 +74,7 @@ Query Plan |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- |0 |PX COORDINATOR | |28 |77 | -|1 |└─EXCHANGE OUT DISTR |:EX10000|28 |49 | +|1 |└─EXCHANGE OUT DISTR |:EX10000|28 |48 | |2 | └─PX PARTITION ITERATOR| |28 |17 | |3 | └─MERGE JOIN | |28 |17 | |4 | ├─TABLE FULL SCAN |score |28 |8 | diff --git a/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_basic_mysql.result b/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_basic_mysql.result index e38b5c2087..0e7fe1fff3 100644 --- a/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_basic_mysql.result +++ b/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_basic_mysql.result @@ -941,7 +941,7 @@ Query Plan ============================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |5 | +|0 |MERGE UNION DISTINCT| |1 |5 | |1 |├─TABLE FULL SCAN |t1(idx_a_b_c)|1 |3 | |2 |└─TABLE FULL SCAN |t2(idx_x_y_z)|1 |3 | ============================================================= @@ -961,7 +961,7 @@ Query Plan ============================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |5 | +|0 |MERGE UNION DISTINCT| |1 |5 | |1 |├─TABLE FULL SCAN |t1(idx_a_b_c)|1 |3 | |2 |└─TABLE FULL SCAN |t2(idx_x_y_z)|1 |3 | ============================================================= @@ -981,7 +981,7 @@ Query Plan ============================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |5 | +|0 |MERGE UNION DISTINCT| |1 |5 | |1 |├─TABLE RANGE SCAN |t1(idx_b_c_a)|1 |3 | |2 |└─TABLE RANGE SCAN |t2(idx_x_y_z)|1 |3 | ============================================================= @@ -1003,7 +1003,7 @@ Query Plan ============================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |5 | +|0 |MERGE UNION DISTINCT| |1 |5 | |1 |├─TABLE RANGE SCAN |t1(idx_b_c_a)|1 |3 | |2 |└─TABLE RANGE SCAN |t2(idx_x_y_z)|1 |3 | ============================================================= @@ -1466,7 +1466,7 @@ Query Plan ================================================================ |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------------------- -|0 |MERGE UNION DISTINCT | |2 |7 | +|0 |MERGE UNION DISTINCT | |1 |7 | |1 |├─TABLE FULL SCAN |t2(idx_x_y_z)|1 |3 | |2 |└─SORT | |1 |5 | |3 | └─MERGE JOIN | |1 |5 | @@ -3080,7 +3080,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |5 | +|0 |MERGE UNION DISTINCT| |1 |5 | |1 |├─TABLE RANGE SCAN |t6(idx_b_e_d_c_a)|1 |3 | |2 |└─TABLE FULL SCAN |tmp(idx_c1_c2_c3)|1 |3 | ================================================================= @@ -3101,7 +3101,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |5 | +|0 |MERGE UNION DISTINCT| |1 |5 | |1 |├─TABLE FULL SCAN |t6(idx_b_e_d_c_a)|1 |3 | |2 |└─TABLE FULL SCAN |tmp(idx_c1_c2_c3)|1 |3 | ================================================================= @@ -3121,7 +3121,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |10 | +|0 |MERGE UNION DISTINCT| |1 |10 | |1 |├─TABLE FULL SCAN |tmp(idx_c1_c2_c3)|1 |3 | |2 |└─SORT | |1 |8 | |3 | └─TABLE FULL SCAN |t6(idx_b_c) |1 |8 | @@ -3144,7 +3144,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |10 | +|0 |MERGE UNION DISTINCT| |1 |10 | |1 |├─TABLE FULL SCAN |tmp(idx_c1_c2_c3)|1 |3 | |2 |└─SORT | |1 |8 | |3 | └─TABLE FULL SCAN |t6(idx_b_a_c) |1 |8 | @@ -3167,7 +3167,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |5 | +|0 |MERGE UNION DISTINCT| |1 |5 | |1 |├─TABLE FULL SCAN |t6(idx_b_e_d_c_a)|1 |3 | |2 |└─TABLE FULL SCAN |tmp(idx_c1_c2_c3)|1 |3 | ================================================================= @@ -3764,6 +3764,7 @@ Optimization Info: unstable_index_name:[t10] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -3813,6 +3814,7 @@ Optimization Info: unstable_index_name:[t10] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -3861,6 +3863,7 @@ Optimization Info: pruned_index_name:[t10i3] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -3909,6 +3912,7 @@ Optimization Info: pruned_index_name:[t10i3] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -3956,6 +3960,7 @@ Optimization Info: avaiable_index_name:[t10i1, t10i2, t10i3, t10] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4006,6 +4011,7 @@ Optimization Info: unstable_index_name:[t10] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4057,6 +4063,7 @@ Optimization Info: pruned_index_name:[t10i1, t10i3] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4108,6 +4115,7 @@ Optimization Info: pruned_index_name:[t10i2, t10i3] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4159,6 +4167,7 @@ Optimization Info: pruned_index_name:[t10i1, t10i3] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4210,6 +4219,7 @@ Optimization Info: pruned_index_name:[t10i1, t10i2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4258,6 +4268,7 @@ Optimization Info: avaiable_index_name:[t11i1, t11] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4306,6 +4317,7 @@ Optimization Info: avaiable_index_name:[t11i1, t11] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4315,7 +4327,7 @@ Query Plan ================================================ |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ------------------------------------------------ -|0 |TABLE RANGE SCAN|t11 |1 |3 | +|0 |TABLE RANGE SCAN|t11 |0 |3 | ================================================ Outputs & filters: ------------------------------------- @@ -4355,6 +4367,7 @@ Optimization Info: pruned_index_name:[t11i1] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4364,7 +4377,7 @@ Query Plan ================================================ |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ------------------------------------------------ -|0 |TABLE RANGE SCAN|t11 |1 |3 | +|0 |TABLE RANGE SCAN|t11 |0 |3 | ================================================ Outputs & filters: ------------------------------------- @@ -4404,6 +4417,7 @@ Optimization Info: pruned_index_name:[t11i1] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4455,6 +4469,7 @@ Optimization Info: avaiable_index_name:[t11i1, t11] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4464,7 +4479,7 @@ Query Plan ================================================ |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ------------------------------------------------ -|0 |TABLE RANGE SCAN|t11 |1 |3 | +|0 |TABLE RANGE SCAN|t11 |0 |3 | ================================================ Outputs & filters: ------------------------------------- @@ -4505,6 +4520,7 @@ Optimization Info: pruned_index_name:[t11i1] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4555,6 +4571,7 @@ Optimization Info: avaiable_index_name:[t11i1, t11] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4603,6 +4620,7 @@ Optimization Info: pruned_index_name:[t11] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4652,6 +4670,7 @@ Optimization Info: avaiable_index_name:[t11i1, t11] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4663,8 +4682,8 @@ Query Plan ================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| -------------------------------------------------- -|0 |SORT | |1 |3 | -|1 |└─TABLE RANGE SCAN|t11 |1 |3 | +|0 |SORT | |0 |3 | +|1 |└─TABLE RANGE SCAN|t11 |0 |3 | ================================================== Outputs & filters: ------------------------------------- @@ -4706,6 +4725,7 @@ Optimization Info: pruned_index_name:[t11i1] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4754,6 +4774,7 @@ Optimization Info: avaiable_index_name:[t12i1, t12i2, t12] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4802,6 +4823,7 @@ Optimization Info: pruned_index_name:[t12i1, t12i2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4850,6 +4872,7 @@ Optimization Info: pruned_index_name:[t12i1] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: @@ -4898,6 +4921,7 @@ Optimization Info: pruned_index_name:[t12i1, t12i2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: LOCAL Note: diff --git a/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_complicate_mysql.result b/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_complicate_mysql.result index c7d64f5483..c3d5f81a3c 100644 --- a/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_complicate_mysql.result +++ b/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_complicate_mysql.result @@ -928,7 +928,7 @@ Query Plan ============================================================================ |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |5 | +|0 |MERGE UNION DISTINCT| |1 |5 | |1 |├─TABLE FULL SCAN |skyline_int(idx_v3_v4_v5_v2)|1 |3 | |2 |└─TABLE FULL SCAN |other |1 |3 | ============================================================================ @@ -971,7 +971,7 @@ Query Plan =============================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |5 | +|0 |MERGE UNION DISTINCT| |1 |5 | |1 |├─TABLE FULL SCAN |skyline_int(idx_v4_v5_v6_v2_v3)|1 |3 | |2 |└─TABLE FULL SCAN |skyline_int(idx_v2_v3_v4) |1 |3 | =============================================================================== @@ -1350,7 +1350,7 @@ Query Plan =============================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |7 | +|0 |MERGE UNION DISTINCT| |1 |7 | |1 |├─MERGE JOIN | |1 |5 | |2 |│ ├─TABLE FULL SCAN |skyline_int(idx_v4_v5_v6_v2_v3)|1 |3 | |3 |│ └─TABLE FULL SCAN |other |1 |3 | @@ -1414,7 +1414,7 @@ Query Plan ===================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------- -|0 |MERGE UNION DISTINCT| |2 |5 | +|0 |MERGE UNION DISTINCT| |1 |5 | |1 |├─TABLE FULL SCAN |other|1 |3 | |2 |└─TABLE FULL SCAN |other|1 |3 | ===================================================== diff --git a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result index 20c53d4d15..1c222839c7 100644 --- a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result +++ b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result @@ -5964,8 +5964,7 @@ Outputs & filters: ------------------------------------- 0 - output([T_FUN_COUNT(*)]), filter(nil), rowset=256 group(nil), agg_func([T_FUN_COUNT(*)]) - 1 - output(nil), filter([concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [result.t0c1 = 398204275], [result.t2c0 = 1489403758], [concat(result.t0c0, '') - IS NULL]), rowset=256 + 1 - output(nil), filter([concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [result.t2c0 = 1489403758], [concat(result.t0c0, '') IS NULL], [result.t0c1 = 398204275]), rowset=256 access([result.t0c0], [result.t0c1], [result.t2c0], [result.t1c0]) 2 - output([t0.c0], [t0.c1], [t2.c0], [t1.c0]), filter(nil), rowset=256 limit(2147483647), offset(0) @@ -6021,8 +6020,7 @@ Outputs & filters: ------------------------------------- 0 - output([T_FUN_COUNT(*)]), filter(nil), rowset=256 group(nil), agg_func([T_FUN_COUNT(*)]) - 1 - output(nil), filter([concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [result.t0c1 = 398204275], [result.t2c0 = 1489403758], [concat(result.t0c0, '') - IS NULL]), rowset=256 + 1 - output(nil), filter([concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [result.t2c0 = 1489403758], [concat(result.t0c0, '') IS NULL], [result.t0c1 = 398204275]), rowset=256 access([result.t0c0], [result.t0c1], [result.t2c0], [result.t1c0]) 2 - output([t0.c0], [t0.c1], [t2.c0], [t1.c0]), filter(nil), rowset=256 limit(2147483647), offset(0) diff --git a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/subplan_filter.result b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/subplan_filter.result index eba25bca79..fbfe110b76 100644 --- a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/subplan_filter.result +++ b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/subplan_filter.result @@ -1777,6 +1777,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:7 @@ -1787,7 +1788,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: @@ -1879,6 +1881,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:7 @@ -1889,7 +1892,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: @@ -1981,6 +1985,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:1 @@ -1992,6 +1997,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: @@ -2091,6 +2097,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:7 @@ -2101,7 +2108,8 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -2112,7 +2120,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: @@ -2212,6 +2221,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:7 @@ -2222,7 +2232,8 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -2233,7 +2244,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: @@ -2333,6 +2345,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:7 @@ -2343,7 +2356,8 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -2354,7 +2368,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: @@ -2454,6 +2469,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:7 @@ -2464,7 +2480,8 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -2475,7 +2492,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: @@ -2584,6 +2602,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:7 @@ -2594,7 +2613,8 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -2605,7 +2625,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:1 @@ -2617,6 +2638,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: @@ -2767,6 +2789,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:1 @@ -2778,6 +2801,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -2788,7 +2812,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:1 @@ -2800,6 +2825,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:1 @@ -2811,6 +2837,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -2821,7 +2848,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:1 @@ -2833,6 +2861,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -2843,7 +2872,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: @@ -2994,6 +3024,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:7 @@ -3004,7 +3035,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -3015,7 +3047,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:1 @@ -3027,6 +3060,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:1 @@ -3038,6 +3072,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -3048,7 +3083,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:1 @@ -3060,6 +3096,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -3070,7 +3107,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: @@ -3221,6 +3259,7 @@ Optimization Info: avaiable_index_name:[t1] stats version:0 dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING FULL] t2: table_rows:22 physical_range_rows:7 @@ -3231,7 +3270,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -3242,7 +3282,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:1 @@ -3254,6 +3295,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:1 @@ -3265,6 +3307,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -3275,7 +3318,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:1 @@ -3287,6 +3331,7 @@ Optimization Info: avaiable_index_name:[t2] stats version:0 dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] t2: table_rows:22 physical_range_rows:7 @@ -3297,7 +3342,8 @@ Optimization Info: dop_method:DAS DOP avaiable_index_name:[t2] stats version:0 - dynamic sampling level:1 + dynamic sampling level:0 + estimation method:[DEFAULT, STORAGE] Plan Type: DISTRIBUTED Note: diff --git a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result index 70c20c7370..dfd7303a62 100644 --- a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result +++ b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result @@ -73,7 +73,7 @@ Query Plan =============================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------- -|0 |TABLE FULL SCAN|t1 |3 |3 | +|0 |TABLE FULL SCAN|t1 |1 |3 | =============================================== Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/subquery/r/mysql/optimizer_subquery_bug.result b/tools/deploy/mysql_test/test_suite/subquery/r/mysql/optimizer_subquery_bug.result index 0e8c991efc..d2b7e4039d 100644 --- a/tools/deploy/mysql_test/test_suite/subquery/r/mysql/optimizer_subquery_bug.result +++ b/tools/deploy/mysql_test/test_suite/subquery/r/mysql/optimizer_subquery_bug.result @@ -1067,7 +1067,7 @@ Query Plan ============================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- -|0 |SUBPLAN FILTER | |0 |9 | +|0 |SUBPLAN FILTER | |1 |9 | |1 |├─TABLE FULL SCAN |table1|1 |3 | |2 |└─SUBPLAN FILTER | |1 |7 | |3 | ├─SCALAR GROUP BY | |1 |5 | diff --git a/tools/deploy/mysql_test/test_suite/subquery/r/mysql/spf_bug13044302.result b/tools/deploy/mysql_test/test_suite/subquery/r/mysql/spf_bug13044302.result index eedf962cd4..e01834e798 100644 --- a/tools/deploy/mysql_test/test_suite/subquery/r/mysql/spf_bug13044302.result +++ b/tools/deploy/mysql_test/test_suite/subquery/r/mysql/spf_bug13044302.result @@ -151,8 +151,8 @@ Query Plan ============================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------------------- -|0 |PX COORDINATOR | |2 |43 | -|1 |└─EXCHANGE OUT DISTR |:EX10003|2 |41 | +|0 |PX COORDINATOR | |2 |41 | +|1 |└─EXCHANGE OUT DISTR |:EX10003|2 |40 | |2 | └─HASH UNION DISTINCT | |2 |38 | |3 | ├─HASH JOIN | |1 |18 | |4 | │ ├─PX PARTITION ITERATOR | |1 |11 | diff --git a/tools/deploy/mysql_test/test_suite/subquery/r/mysql/subquery.result b/tools/deploy/mysql_test/test_suite/subquery/r/mysql/subquery.result index d1f5cd647a..2963dbabe8 100644 --- a/tools/deploy/mysql_test/test_suite/subquery/r/mysql/subquery.result +++ b/tools/deploy/mysql_test/test_suite/subquery/r/mysql/subquery.result @@ -419,11 +419,11 @@ Query Plan =================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |UNION ALL | |2 |7 | +|0 |UNION ALL | |3 |7 | |1 |├─TABLE FULL SCAN |t1 |1 |3 | -|2 |└─MERGE JOIN | |1 |5 | +|2 |└─MERGE JOIN | |2 |5 | |3 | ├─TABLE FULL SCAN|t2 |2 |3 | -|4 | └─TABLE FULL SCAN|t1 |1 |3 | +|4 | └─TABLE FULL SCAN|t1 |2 |3 | =================================================== Outputs & filters: ------------------------------------- @@ -445,61 +445,47 @@ Outputs & filters: range_key([t1.c1]), range(MIN ; MAX)always true explain select * from t1 where t1.c2 = 5 or exists (select 1 from t2 where t1.c1 > t2.c1); Query Plan -================================================================ -|ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |UNION ALL | |2 |24 | -|1 |├─TABLE FULL SCAN |t1 |1 |3 | -|2 |└─NESTED-LOOP SEMI JOIN | |1 |21 | -|3 | ├─TABLE FULL SCAN |t1 |1 |3 | -|4 | └─DISTRIBUTED TABLE RANGE SCAN|t2 |1 |18 | -================================================================ +============================================================== +|ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| +-------------------------------------------------------------- +|0 |SUBPLAN FILTER | |2 |39 | +|1 |├─TABLE FULL SCAN |t1 |2 |3 | +|2 |└─DISTRIBUTED TABLE RANGE SCAN|t2 |1 |18 | +============================================================== Outputs & filters: ------------------------------------- - 0 - output([UNION([1])], [UNION([2])], [UNION([3])]), filter(nil), rowset=16 - 1 - output([t1.c1], [t1.c2], [t1.c3]), filter([t1.c2 = 5]), rowset=16 + 0 - output([t1.c1], [t1.c2], [t1.c3]), filter([t1.c2 = 5 OR (T_OP_EXISTS, subquery(1))]), rowset=16 + exec_params_([t1.c1(:0)]), onetime_exprs_(nil), init_plan_idxs_(nil), use_batch=true + 1 - output([t1.c1], [t1.c2], [t1.c3]), filter(nil), rowset=16 access([t1.c1], [t1.c2], [t1.c3]), partitions(p0) - is_index_back=false, is_global_index=false, filter_before_indexback[false], - range_key([t1.c1]), range(MIN ; MAX)always true - 2 - output([t1.c1], [t1.c2], [t1.c3]), filter(nil), rowset=16 - conds(nil), nl_params_([t1.c1(:0)]), use_batch=false - 3 - output([t1.c1], [t1.c2], [t1.c3]), filter([lnnvl(cast(t1.c2 = 5, TINYINT(-1, 0)))]), rowset=16 - access([t1.c1], [t1.c2], [t1.c3]), partitions(p0) - is_index_back=false, is_global_index=false, filter_before_indexback[false], - range_key([t1.c1]), range(MIN ; MAX)always true - 4 - output(nil), filter(nil), rowset=16 - access(nil), partitions(p0) is_index_back=false, is_global_index=false, - range_key([t2.c1]), range(MIN ; MAX), + range_key([t1.c1]), range(MIN ; MAX)always true + 2 - output([1]), filter(nil), rowset=16 + access([GROUP_ID]), partitions(p0) + limit(1), offset(nil), is_index_back=false, is_global_index=false, + range_key([t2.c1]), range(MIN ; MAX)always true, range_cond([:0 > t2.c1]) explain select * from t1 where t1.c2 = 5 or exists (select 1 from t2 where t1.c1 < t2.c1); Query Plan -================================================================ -|ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |UNION ALL | |2 |24 | -|1 |├─TABLE FULL SCAN |t1 |1 |3 | -|2 |└─NESTED-LOOP SEMI JOIN | |1 |21 | -|3 | ├─TABLE FULL SCAN |t1 |1 |3 | -|4 | └─DISTRIBUTED TABLE RANGE SCAN|t2 |1 |18 | -================================================================ +============================================================== +|ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| +-------------------------------------------------------------- +|0 |SUBPLAN FILTER | |2 |39 | +|1 |├─TABLE FULL SCAN |t1 |2 |3 | +|2 |└─DISTRIBUTED TABLE RANGE SCAN|t2 |1 |18 | +============================================================== Outputs & filters: ------------------------------------- - 0 - output([UNION([1])], [UNION([2])], [UNION([3])]), filter(nil), rowset=16 - 1 - output([t1.c1], [t1.c2], [t1.c3]), filter([t1.c2 = 5]), rowset=16 + 0 - output([t1.c1], [t1.c2], [t1.c3]), filter([t1.c2 = 5 OR (T_OP_EXISTS, subquery(1))]), rowset=16 + exec_params_([t1.c1(:0)]), onetime_exprs_(nil), init_plan_idxs_(nil), use_batch=true + 1 - output([t1.c1], [t1.c2], [t1.c3]), filter(nil), rowset=16 access([t1.c1], [t1.c2], [t1.c3]), partitions(p0) - is_index_back=false, is_global_index=false, filter_before_indexback[false], - range_key([t1.c1]), range(MIN ; MAX)always true - 2 - output([t1.c1], [t1.c2], [t1.c3]), filter(nil), rowset=16 - conds(nil), nl_params_([t1.c1(:0)]), use_batch=false - 3 - output([t1.c1], [t1.c2], [t1.c3]), filter([lnnvl(cast(t1.c2 = 5, TINYINT(-1, 0)))]), rowset=16 - access([t1.c1], [t1.c2], [t1.c3]), partitions(p0) - is_index_back=false, is_global_index=false, filter_before_indexback[false], - range_key([t1.c1]), range(MIN ; MAX)always true - 4 - output(nil), filter(nil), rowset=16 - access(nil), partitions(p0) is_index_back=false, is_global_index=false, - range_key([t2.c1]), range(MIN ; MAX), + range_key([t1.c1]), range(MIN ; MAX)always true + 2 - output([1]), filter(nil), rowset=16 + access([GROUP_ID]), partitions(p0) + limit(1), offset(nil), is_index_back=false, is_global_index=false, + range_key([t2.c1]), range(MIN ; MAX)always true, range_cond([:0 < t2.c1]) explain select * from t1, t2 where t1.c1 > exists(select c1 from t2 where t2.c1 = t1.c1); Query Plan @@ -621,8 +607,8 @@ Query Plan ======================================================= |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ------------------------------------------------------- -|0 |UNION ALL | |5 |11 | -|1 |├─HASH JOIN | |4 |6 | +|0 |UNION ALL | |3 |11 | +|1 |├─HASH JOIN | |2 |6 | |2 |│ ├─TABLE FULL SCAN |t1 |2 |3 | |3 |│ └─TABLE FULL SCAN |t2 |2 |3 | |4 |└─MERGE SEMI JOIN | |1 |5 | @@ -1000,27 +986,27 @@ Query Plan =============================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------- -|0 |TABLE FULL SCAN|t1 |1 |3 | +|0 |TABLE FULL SCAN|t1 |0 |3 | =============================================== Outputs & filters: ------------------------------------- 0 - output([t1.a], [t1.b]), filter(nil), startup_filter([0]), rowset=16 access([t1.a], [t1.b]), partitions(p0) is_index_back=false, is_global_index=false, - range_key([t1.__pk_increment]), range(MIN ; MAX)always true + range_key([t1.__pk_increment]), range(MAX ; MIN)always false explain select * from t1 where not exists (select max(a) from t2); Query Plan =============================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------- -|0 |TABLE FULL SCAN|t1 |1 |3 | +|0 |TABLE FULL SCAN|t1 |0 |3 | =============================================== Outputs & filters: ------------------------------------- 0 - output([t1.a], [t1.b]), filter(nil), startup_filter([0]), rowset=16 access([t1.a], [t1.b]), partitions(p0) is_index_back=false, is_global_index=false, - range_key([t1.__pk_increment]), range(MIN ; MAX)always true + range_key([t1.__pk_increment]), range(MAX ; MIN)always false explain select * from t1 where not exists (select group_concat(a, b) from t2 group by a); Query Plan ================================================= @@ -1047,14 +1033,14 @@ Query Plan =============================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------- -|0 |TABLE FULL SCAN|t1 |1 |3 | +|0 |TABLE FULL SCAN|t1 |0 |3 | =============================================== Outputs & filters: ------------------------------------- 0 - output([t1.a], [t1.b]), filter(nil), startup_filter([0]), rowset=16 access([t1.a], [t1.b]), partitions(p0) is_index_back=false, is_global_index=false, - range_key([t1.__pk_increment]), range(MIN ; MAX)always true + range_key([t1.__pk_increment]), range(MAX ; MIN)always false explain select * from t1 where not exists (select max(a), sum(a), count(a) from t2 group by a); Query Plan ================================================= diff --git a/tools/deploy/mysql_test/test_suite/window_function/r/mysql/farm.result b/tools/deploy/mysql_test/test_suite/window_function/r/mysql/farm.result index 5be62df65e..0e1453588f 100644 --- a/tools/deploy/mysql_test/test_suite/window_function/r/mysql/farm.result +++ b/tools/deploy/mysql_test/test_suite/window_function/r/mysql/farm.result @@ -88,16 +88,16 @@ w3 as (order by c_customer_sk) ) v where rn = 1; c_birth_month c_birth_year c_customer_sk c_first_name group_concat(c_customer_sk) over w2 nv rn -12 1936 1 Javier 6,44,16,1 1 1 12 1925 6 Brunilda 6 2 1 -12 1963 11 Betty 6,44,16,1,8,39,11 3 1 -12 1933 16 Margie 6,44,16 4 1 -5 1956 21 Naomi 23,5,21 5 1 6 1991 26 Monique 41,24,35,12,49,31,4,26 6 1 +12 1936 1 Javier 6,44,16,1 1 1 +4 1990 46 Jane 2,7,46 10 1 +5 1956 21 Naomi 23,5,21 5 1 +12 1933 16 Margie 6,44,16 4 1 +6 1939 41 Maxine 41 9 1 6 1964 31 William 41,24,35,12,49,31 7 1 11 1968 36 Anthony 50,36 8 1 -6 1939 41 Maxine 41 9 1 -4 1990 46 Jane 2,7,46 10 1 +12 1963 11 Betty 6,44,16,1,8,39,11 3 1 select c_birth_month, c_birth_year, c_customer_sk, c_first_name, count(1) over w, max(c_birth_year) over w,