diff --git a/src/observer/dbms_scheduler/ob_dbms_sched_table_operator.cpp b/src/observer/dbms_scheduler/ob_dbms_sched_table_operator.cpp index 52bf2ffc14..d540e0ddd6 100644 --- a/src/observer/dbms_scheduler/ob_dbms_sched_table_operator.cpp +++ b/src/observer/dbms_scheduler/ob_dbms_sched_table_operator.cpp @@ -320,7 +320,8 @@ int ObDBMSSchedTableOperator::update_for_end(ObDBMSSchedJobInfo &job_info, int e OZ (_build_job_drop_dml(now, job_info, sql1)); } else { OX (job_info.failures_ = (err == 0) ? 0 : (job_info.failures_ + 1)); - OX (job_info.flag_ = job_info.failures_ > 15 ? (job_info.flag_ | 0x1) : (job_info.flag_ & 0xfffffffffffffffE)); + // + OX (job_info.flag_ = (job_info.failures_ > 15 && !ObDbmsStatsMaintenanceWindow::is_stats_job(job_info.get_job_name())) ? (job_info.flag_ | 0x1) : (job_info.flag_ & 0xfffffffffffffffE)); OX (job_info.total_ += (job_info.this_date_ > 0 ? now - job_info.this_date_ : 0)); if (OB_SUCC(ret) && ((job_info.flag_ & 0x1) != 0)) { // when if failures > 16 then set broken state. diff --git a/src/pl/ob_pl_interface_pragma.h b/src/pl/ob_pl_interface_pragma.h index a7c8675e18..e17c96c19b 100644 --- a/src/pl/ob_pl_interface_pragma.h +++ b/src/pl/ob_pl_interface_pragma.h @@ -351,6 +351,7 @@ INTERFACE_DEF(INTERFACE_DBMS_STATS_GATHER_SYSTEM_STATS, "GATHER_SYSTEM_STATS", (ObDbmsStats::gather_system_stats)) INTERFACE_DEF(INTERFACE_DBMS_STATS_DELETE_SYSTEM_STATS, "DELETE_SYSTEM_STATS", (ObDbmsStats::delete_system_stats)) INTERFACE_DEF(INTERFACE_DBMS_STATS_SET_SYSTEM_STATS, "SET_SYSTEM_STATS", (ObDbmsStats::set_system_stats)) + INTERFACE_DEF(INTERFACE_DBMS_STATS_ASYNC_GATHER_STATS_JOB_PROC, "ASYNC_GATHER_STATS_JOB_PROC", (ObDbmsStats::async_gather_stats_job_proc)) //end of dbms_stat #ifdef OB_BUILD_ORACLE_PL diff --git a/src/pl/sys_package/ob_dbms_stats.cpp b/src/pl/sys_package/ob_dbms_stats.cpp index 7abc5a1b64..8c88848e7e 100644 --- a/src/pl/sys_package/ob_dbms_stats.cpp +++ b/src/pl/sys_package/ob_dbms_stats.cpp @@ -33,6 +33,7 @@ #include "sql/engine/expr/ob_expr_uuid.h" #include "sql/privilege_check/ob_ora_priv_check.h" #include "sql/ob_result_set.h" +#include "share/stat/ob_dbms_stats_maintenance_window.h" namespace oceanbase { @@ -59,8 +60,8 @@ namespace pl { * 12. no_invalidate BOOLEAN DEFAULT to_no_invalidate_type(get_param('NO_INVALIDATE')), * 13. stattype VARCHAR2 DEFAULT 'DATA', * 14. force BOOLEAN DEFAULT false, - * 15. context DBMS_STATS.CONTEXT DEFAULT NULL, - * 16. options VARCHAR2 DEFAULT 'GATHER' + * 15. hist_est_percent NUMBER DEFAULT AUTO_SAMPLE_SIZE + * 16. hist_block_sample BOOLEAN DEFAULT FALSE, * @param result * @return */ @@ -79,9 +80,11 @@ int ObDbmsStats::gather_table_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb LOG_WARN("failed to check tenant is restore", K(ret)); } else if (OB_FAIL(ObDbmsStatsUtils::implicit_commit_before_gather_stats(ctx))) { LOG_WARN("failed to implicit commit before gather stats", K(ret)); - } else if (OB_ISNULL(ctx.get_my_session()) || OB_ISNULL(ctx.get_task_executor_ctx())) { + } else if (OB_ISNULL(ctx.get_my_session())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(ctx.get_my_session()), K(ctx.get_task_executor_ctx())); + LOG_WARN("get unexpected error", K(ret), K(ctx.get_my_session())); + } else if (OB_FAIL(ObDbmsStatsUtils::cancel_async_gather_stats(ctx))) { + LOG_WARN("failed to cancel async gather stats", K(ret)); } else if (OB_FAIL(init_gather_task_info(ctx, ObOptStatGatherType::MANUAL_GATHER, start_time, task_cnt, task_info))) { LOG_WARN("failed to init gather task info", K(ret)); } else { @@ -106,6 +109,8 @@ int ObDbmsStats::gather_table_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb params.at(8), params.at(12), params.at(14), + params.count() > 15 ? ¶ms.at(15) : NULL, + params.count() > 16 ? ¶ms.at(16) : NULL, stat_param))) { LOG_WARN("failed to parse stat optitions", K(ret)); } else if (OB_FAIL(running_monitor.add_table_info(stat_param))) { @@ -181,13 +186,15 @@ int ObDbmsStats::gather_schema_stats(ObExecContext &ctx, ParamStore ¶ms, ObO LOG_WARN("failed to check tenant is restore", K(ret)); } else if (OB_FAIL(ObDbmsStatsUtils::implicit_commit_before_gather_stats(ctx))) { LOG_WARN("failed to implicit commit before gather stats", K(ret)); - } else if (OB_ISNULL(ctx.get_my_session()) || OB_ISNULL(ctx.get_task_executor_ctx())) { + } else if (OB_ISNULL(ctx.get_my_session())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(ctx.get_my_session()), K(ctx.get_task_executor_ctx())); + LOG_WARN("get unexpected error", K(ret), K(ctx.get_my_session())); } else if (ctx.get_my_session()->get_is_in_retry()) { ret = OB_ERR_DBMS_STATS_PL; LOG_WARN("retry gather schema stats is not allowed", K(ret)); LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"retry gather schema stats is not allowed"); + } else if (OB_FAIL(ObDbmsStatsUtils::cancel_async_gather_stats(ctx))) { + LOG_WARN("failed to cancel async gather stats", K(ret)); } else if (OB_FAIL(ObOptStatMonitorManager::flush_database_monitoring_info(ctx, false, true))) { LOG_WARN("failed to do flush database monitoring info", K(ret)); } else if (OB_FAIL(get_all_table_ids_in_database(ctx, params.at(0), global_param, table_ids))) { @@ -228,6 +235,8 @@ int ObDbmsStats::gather_schema_stats(ObExecContext &ctx, ParamStore ¶ms, ObO params.at(6), params.at(10), params.at(12), + NULL/*hist_est_percent*/, + NULL/*hist_block_sample*/, stat_param))) { LOG_WARN("failed to parse stat optitions", K(ret)); } else if (OB_FAIL(running_monitor.add_table_info(stat_param))) { @@ -342,6 +351,8 @@ int ObDbmsStats::gather_index_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb empty_cascade, params.at(9), params.at(10), + NULL/*hist_est_percent*/, + NULL/*hist_block_sample*/, ind_stat_param))) { LOG_WARN("failed to parse stat optitions", K(ret)); } else if (ObDbmsStatsUtils::is_virtual_index_table(ind_stat_param.table_id_)) {//not gather virtual table index. @@ -2851,6 +2862,7 @@ int ObDbmsStats::get_prefs(sql::ObExecContext &ctx, ObTableStatParam param; param.allocator_ = &ctx.get_allocator(); ObStatPrefs *stat_pref = NULL; + uint64_t tenant_id = ctx.get_my_session()->get_effective_tenant_id(); if (OB_FAIL(check_statistic_table_writeable(ctx))) { LOG_WARN("failed to check tenant is restore", K(ret)); } else if (!params.at(0).is_null() && OB_FAIL(params.at(0).get_string(opt_name))) { @@ -2869,7 +2881,9 @@ int ObDbmsStats::get_prefs(sql::ObExecContext &ctx, ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(stat_pref)); } else if (FALSE_IT(stat_pref->set_is_global_prefs(true))) { - } else if (OB_FAIL(ObDbmsStatsPreferences::get_prefs(ctx, param, opt_name, result))) { + } else if (OB_FAIL(ObDbmsStatsPreferences::get_prefs(ctx.get_sql_proxy(), ctx.get_allocator(), + tenant_id, param.table_id_, + opt_name, result))) { LOG_WARN("failed to get prefs", K(ret)); } else {/*do nothing*/} return ret; @@ -3016,6 +3030,7 @@ int ObDbmsStats::set_table_prefs(sql::ObExecContext &ctx, ObSEArray table_ids; ObStatPrefs *stat_pref = NULL; bool use_size_auto = false; + bool is_async_gather = false; if (OB_FAIL(check_statistic_table_writeable(ctx))) { LOG_WARN("failed to check tenant is restore", K(ret)); } else if (OB_FAIL(ObDbmsStatsUtils::implicit_commit_before_gather_stats(ctx))) { @@ -3048,7 +3063,7 @@ int ObDbmsStats::set_table_prefs(sql::ObExecContext &ctx, } else if (OB_FAIL(stat_pref->dump_pref_name_and_value(opt_name, opt_value))) { LOG_WARN("failed to dump pref name and value"); } else if (0 == opt_name.case_compare("METHOD_OPT") && - OB_FAIL(parse_method_opt(ctx, param.allocator_, param.column_params_, opt_value, use_size_auto))) { + OB_FAIL(parse_method_opt(ctx, param.allocator_, param.column_params_, opt_value, is_async_gather, use_size_auto))) { LOG_WARN("failed to parse method opt", K(ret)); } else if (OB_FAIL(ObDbmsStatsPreferences::set_prefs(ctx, table_ids, opt_name, opt_value))) { LOG_WARN("failed to set prefs", K(ret)); @@ -3188,6 +3203,87 @@ int ObDbmsStats::cancel_gather_stats(sql::ObExecContext &ctx, return ret; } +/** + * @brief ObDbmsStats::async_gather_stats_job_proc + * @param ctx + * @param params + * 0. duration NUMBER + * @param result + * @return int + */ +int ObDbmsStats::async_gather_stats_job_proc(sql::ObExecContext &ctx, + sql::ParamStore ¶ms, + common::ObObj &result) +{ + int ret = OB_SUCCESS; + UNUSED(result); + const int64_t start_time = ObTimeUtility::current_time(); + ObOptStatTaskInfo task_info; + number::ObNumber num_duration; + int64_t duration_time = -1; + int64_t succeed_cnt = 0; + bool no_async_gather = (OB_E(EventTable::EN_LEADER_STORAGE_ESTIMATION) OB_SUCCESS) != OB_SUCCESS; + ObSQLSessionInfo *session = ctx.get_my_session(); + uint64_t tenant_id = session->get_effective_tenant_id(); + ObSQLSessionInfo::LockGuard query_lock_guard(session->get_query_lock()); + uint64_t data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, data_version))) { + LOG_WARN("fail to get tenant data version", KR(ret), K(tenant_id), K(data_version)); + } else if (data_version < MOCK_DATA_VERSION_4_2_4_0 || + (data_version >= DATA_VERSION_4_3_0_0 && data_version < DATA_VERSION_4_3_3_0)) { + //do nothing + } else if (OB_FAIL(check_statistic_table_writeable(ctx))) { + ret = OB_SUCCESS; + LOG_INFO("async gather stats abort because of statistic table is unwriteable"); + } else if (OB_FAIL(ObDbmsStatsUtils::implicit_commit_before_gather_stats(ctx))) { + LOG_WARN("failed to implicit commit before gather stats", K(ret)); + } else if (!session->is_user_session() && no_async_gather) { + //do nothing + LOG_INFO("async gather stats abort because of the trace point and not user seesion", K(session->is_user_session()), K(no_async_gather)); + } else if (is_virtual_tenant_id(tenant_id)) { + // do nothing + } else if (GCONF.in_upgrade_mode()) { + //in upgrade, don't async gather table stats + } else if (lib::is_oracle_mode() && !params.empty() && !params.at(0).is_null() && + OB_FAIL(params.at(0).get_number(num_duration))) { + LOG_WARN("failed to get duration", K(ret), K(params.at(0))); + } else if (lib::is_oracle_mode() && !params.empty() && !params.at(0).is_null() && + OB_FAIL(num_duration.extract_valid_int64_with_trunc(duration_time))) { + LOG_WARN("extract_valid_int64_with_trunc failed", K(ret), K(num_duration)); + } else if (lib::is_mysql_mode() && !params.empty() && !params.at(0).is_null() && + OB_FAIL(params.at(0).get_int(duration_time))) { + LOG_WARN("failed to get duration", K(ret), K(params.at(0))); + } else { + bool is_can_async_gather = true; + if (duration_time > 0) { + THIS_WORKER.set_timeout_ts(duration_time + ObTimeUtility::current_time()); + } + if (OB_FAIL(init_gather_task_info(ctx, ObOptStatGatherType::AYSNC_GATHER, start_time, 0, task_info))) { + LOG_WARN("failed to init gather task info", K(ret)); + } else if (OB_FAIL(ObDbmsStatsUtils::check_can_async_gather_stats(ctx))) { + LOG_WARN("failed to check can async gather stats", K(ret)); + is_can_async_gather = (ret != OB_ERR_DBMS_STATS_PL); + } else if (OB_FAIL(ObOptStatMonitorManager::flush_database_monitoring_info(ctx))) { + LOG_WARN("failed to flush database monitoring info", K(ret)); + } else if (OB_FAIL(async_gather_table_stats(ctx, duration_time, succeed_cnt, task_info))) { + LOG_WARN("failed to gather table stats", K(ret)); + } else {/*do nothing*/} + const int64_t exe_time = ObTimeUtility::current_time() - start_time; + LOG_INFO("have been async gathered stats job", + "the total used time:", exe_time, + "the max duration time:", duration_time, + "the toatal gather table cnt:", task_info.task_table_count_, + "the succeed to gather table cnt:", succeed_cnt, + "the failed to gather table cnt:", task_info.failed_count_, K(ret)); + //reset the error code, the reason is that the total gather time is reach the duration time. + ret = ret == OB_TIMEOUT ? OB_SUCCESS : ret; + task_info.task_end_time_ = ObTimeUtility::current_time(); + task_info.ret_code_ = is_can_async_gather ? ret : OB_ERR_QUERY_INTERRUPTED; + update_optimizer_gather_stat_info(&task_info, NULL); + } + return ret; +} + int ObDbmsStats::update_stat_cache(const uint64_t rpc_tenant_id, const ObTableStatParam ¶m, ObOptStatRunningMonitor *running_monitor/*default null*/) @@ -3219,66 +3315,78 @@ int ObDbmsStats::update_stat_cache(const uint64_t rpc_tenant_id, } } if (OB_SUCC(ret)) { - LOG_TRACE("update stat cache", K(stat_arg)); - bool evict_plan_failed = false; - int64_t timeout = -1; - ObSEArray all_server_arr; - bool has_read_only_zone = false; // UNUSED; - if (OB_ISNULL(GCTX.srv_rpc_proxy_) || OB_ISNULL(GCTX.locality_manager_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("rpc_proxy or session is null", K(ret), K(GCTX.srv_rpc_proxy_), K(GCTX.locality_manager_)); - } else if (OB_FAIL(GCTX.locality_manager_->get_server_locality_array(all_server_arr, - has_read_only_zone))) { - LOG_WARN("fail to get server locality", K(ret)); - } else { - ObSEArray failed_server_arr; - for (int64_t i = 0; OB_SUCC(ret) && i < all_server_arr.count(); i++) { - if (!all_server_arr.at(i).is_active() - || ObServerStatus::OB_SERVER_ACTIVE != all_server_arr.at(i).get_server_status() - || 0 == all_server_arr.at(i).get_start_service_time() - || 0 != all_server_arr.at(i).get_server_stop_time()) { - //server may not serving - } else if (0 >= (timeout = THIS_WORKER.get_timeout_remain())) { - ret = OB_TIMEOUT; - LOG_WARN("query timeout is reached", K(ret), K(timeout)); - } else if (OB_FAIL(GCTX.srv_rpc_proxy_->to(all_server_arr.at(i).get_addr()) - .timeout(timeout) - .by(rpc_tenant_id) - .update_local_stat_cache(stat_arg))) { - LOG_WARN("failed to update local stat cache caused by unknow error", - K(ret), K(all_server_arr.at(i).get_addr()), K(stat_arg)); - if (OB_FAIL(failed_server_arr.push_back(all_server_arr.at(i)))) { - LOG_WARN("failed to push back", K(ret)); - } + if (OB_FAIL(update_stat_cache(rpc_tenant_id, stat_arg, running_monitor))) { + LOG_WARN("failed to update stat cache", K(ret)); + } + } + return ret; +} + +int ObDbmsStats::update_stat_cache(const uint64_t tenant_id, + obrpc::ObUpdateStatCacheArg &stat_arg, + ObOptStatRunningMonitor *running_monitor/*default null*/) +{ + int ret = OB_SUCCESS; + LOG_TRACE("update stat cache", K(stat_arg)); + bool evict_plan_failed = false; + int64_t timeout = -1; + ObSEArray all_server_arr; + bool has_read_only_zone = false; // UNUSED; + if (OB_ISNULL(GCTX.srv_rpc_proxy_) || OB_ISNULL(GCTX.locality_manager_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("rpc_proxy or session is null", K(ret), K(GCTX.srv_rpc_proxy_), K(GCTX.locality_manager_)); + } else if (OB_FAIL(GCTX.locality_manager_->get_server_locality_array(all_server_arr, + has_read_only_zone))) { + LOG_WARN("fail to get server locality", K(ret)); + } else { + ObSEArray failed_server_arr; + for (int64_t i = 0; OB_SUCC(ret) && i < all_server_arr.count(); i++) { + timeout = std::min(MAX_OPT_STATS_PROCESS_RPC_TIMEOUT, THIS_WORKER.get_timeout_remain()); + if (!all_server_arr.at(i).is_active() + || ObServerStatus::OB_SERVER_ACTIVE != all_server_arr.at(i).get_server_status() + || 0 == all_server_arr.at(i).get_start_service_time() + || 0 != all_server_arr.at(i).get_server_stop_time()) { + //server may not serving + } else if (0 >=(timeout)) { + ret = OB_TIMEOUT; + LOG_WARN("query timeout is reached", K(ret), K(timeout)); + } else if (OB_FAIL(GCTX.srv_rpc_proxy_->to(all_server_arr.at(i).get_addr()) + .timeout(timeout) + .by(tenant_id) + .update_local_stat_cache(stat_arg))) { + LOG_WARN("failed to update local stat cache caused by unknow error", + K(ret), K(all_server_arr.at(i).get_addr()), K(stat_arg)); + if (OB_FAIL(failed_server_arr.push_back(all_server_arr.at(i)))) { + LOG_WARN("failed to push back", K(ret)); } } - LOG_TRACE("update stat cache", K(param), K(stat_arg), K(failed_server_arr), K(all_server_arr)); - if (OB_SUCC(ret) && !failed_server_arr.empty() && running_monitor != NULL) { - ObSqlString tmp_str; - char *buf = NULL; - if (failed_server_arr.count() * (common::MAX_IP_ADDR_LENGTH + 1) <= common::MAX_VALUE_LENGTH) { - for (int64_t i = 0; OB_SUCC(ret) && i < failed_server_arr.count(); ++i) { - char svr_buf[common::MAX_IP_ADDR_LENGTH] = {0}; - failed_server_arr.at(i).get_addr().to_string(svr_buf, common::MAX_IP_ADDR_LENGTH); - if (OB_FAIL(tmp_str.append_fmt("%s%s", svr_buf, i == 0 ? "" : ","))) { - LOG_WARN("failed to append fmt", K(ret)); - } + } + LOG_TRACE("update stat cache", K(stat_arg), K(failed_server_arr), K(all_server_arr)); + if (OB_SUCC(ret) && !failed_server_arr.empty() && running_monitor != NULL) { + ObSqlString tmp_str; + char *buf = NULL; + if (failed_server_arr.count() * (common::MAX_IP_ADDR_LENGTH + 1) <= common::MAX_VALUE_LENGTH) { + for (int64_t i = 0; OB_SUCC(ret) && i < failed_server_arr.count(); ++i) { + char svr_buf[common::MAX_IP_ADDR_LENGTH] = {0}; + failed_server_arr.at(i).get_addr().to_string(svr_buf, common::MAX_IP_ADDR_LENGTH); + if (OB_FAIL(tmp_str.append_fmt("%s%s", svr_buf, i == 0 ? "" : ","))) { + LOG_WARN("failed to append fmt", K(ret)); } - } else if (OB_FAIL(tmp_str.append_fmt("more than %ld servers refresh stat cache failed", - failed_server_arr.count()))) { - LOG_WARN("failed to append fmt", K(ret)); - } - if (OB_FAIL(ret)) { - //do nothing - } else if (OB_ISNULL(buf = static_cast(running_monitor->allocator_.alloc(tmp_str.length())))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("memory is not enough", K(ret), K(tmp_str)); - } else { - MEMCPY(buf, tmp_str.ptr(), tmp_str.length()); - ObString tmp_failed_list(tmp_str.length(), buf); - ObOptStatGatherStatList::instance().update_gather_stat_refresh_failed_list(tmp_failed_list, - running_monitor->opt_stat_gather_stat_); } + } else if (OB_FAIL(tmp_str.append_fmt("more than %ld servers refresh stat cache failed", + failed_server_arr.count()))) { + LOG_WARN("failed to append fmt", K(ret)); + } + if (OB_FAIL(ret)) { + //do nothing + } else if (OB_ISNULL(buf = static_cast(running_monitor->allocator_.alloc(tmp_str.length())))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("memory is not enough", K(ret), K(tmp_str)); + } else { + MEMCPY(buf, tmp_str.ptr(), tmp_str.length()); + ObString tmp_failed_list(tmp_str.length(), buf); + ObOptStatGatherStatList::instance().update_gather_stat_refresh_failed_list(tmp_failed_list, + running_monitor->opt_stat_gather_stat_); } } } @@ -4019,6 +4127,8 @@ int ObDbmsStats::parse_gather_stat_options(ObExecContext &ctx, const ObObjParam &cascade, const ObObjParam &no_invalidate, const ObObjParam &force, + const ObObjParam *hist_est_percent, + const ObObjParam *hist_block_sample, ObTableStatParam ¶m) { int ret = OB_SUCCESS; @@ -4122,6 +4232,41 @@ int ObDbmsStats::parse_gather_stat_options(ObExecContext &ctx, } } + if (OB_SUCC(ret)) { + if (hist_est_percent != NULL) { + double percent = 0.0; + number::ObNumber num_hist_est_percent; + if (hist_est_percent->is_null()) { + param.hist_sample_info_.set_percent(100.0); + } else if (OB_FAIL(hist_est_percent->get_number(num_hist_est_percent))) { + LOG_WARN("failed to get number", K(ret)); + } else if (OB_FAIL(ObDbmsStatsUtils::cast_number_to_double(num_hist_est_percent, percent))) { + LOG_WARN("failed to cast number to double" , K(ret)); + } else if (percent == 0.0) { + stat_options |= StatOptionFlags::OPT_HIST_EST_PERCENT; + } else if (OB_UNLIKELY(percent < 0.000001 || percent > 100.0)) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal sample percent: must be in the range[0.000001,100]", K(ret)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Illegal sample percent: must be in the range[0.000001,100]"); + } else { + param.hist_sample_info_.set_percent(percent); + } + } + } + + if (OB_SUCC(ret)) { + if (hist_block_sample != NULL) { + bool is_block_sample = false; + if (hist_block_sample->is_null()) { + stat_options |= StatOptionFlags::OPT_HIST_BLOCK_SAMPLE; + } else if (OB_FAIL(hist_block_sample->get_bool(is_block_sample))) { + LOG_WARN("failed to get block sample", K(ret)); + } else { + param.hist_sample_info_.set_is_block_sample(is_block_sample); + } + } + } + if (OB_SUCC(ret)) { if (stat_options > 0 && OB_FAIL(get_default_stat_options(ctx, stat_options, param))) { LOG_WARN("failed to get default stat options", K(ret)); @@ -4133,7 +4278,6 @@ int ObDbmsStats::parse_gather_stat_options(ObExecContext &ctx, } int ObDbmsStats::use_default_gather_stat_options(ObExecContext &ctx, - const StatTable &stat_table, ObTableStatParam ¶m) { int ret = OB_SUCCESS; @@ -4243,6 +4387,38 @@ int ObDbmsStats::get_default_stat_options(ObExecContext &ctx, LOG_WARN("failed to push back", K(ret)); } } + if (OB_SUCC(ret) && param.is_async_gather_ && stat_options & StatOptionFlags::OPT_ASYNC_GATHER_SAMPLE_SIZE) { + ObAsyncGatherSampleSizePrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(*param.allocator_, ctx.get_my_session(), ObString(), tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else if (OB_FAIL(stat_prefs.push_back(tmp_pref))) { + LOG_WARN("failed to push back", K(ret)); + } + } + if (OB_SUCC(ret) && param.is_async_gather_ && stat_options & StatOptionFlags::OPT_ASYNC_GATHER_FULL_TABLE_SIZE) { + ObAsyncGatherFullTableSizePrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(*param.allocator_, ctx.get_my_session(), ObString(), tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else if (OB_FAIL(stat_prefs.push_back(tmp_pref))) { + LOG_WARN("failed to push back", K(ret)); + } + } + if (OB_SUCC(ret) && stat_options & StatOptionFlags::OPT_HIST_EST_PERCENT) { + ObHistEstPercentPrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(*param.allocator_, ctx.get_my_session(), ObString(), tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else if (OB_FAIL(stat_prefs.push_back(tmp_pref))) { + LOG_WARN("failed to push back", K(ret)); + } + } + if (OB_SUCC(ret) && stat_options & StatOptionFlags::OPT_HIST_BLOCK_SAMPLE) { + ObHistBlockSamplePrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(*param.allocator_, ctx.get_my_session(), ObString(), tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else if (OB_FAIL(stat_prefs.push_back(tmp_pref))) { + LOG_WARN("failed to push back", K(ret)); + } + } if (OB_SUCC(ret)) { if (OB_FAIL(ObDbmsStatsPreferences::get_sys_default_stat_options(ctx, stat_prefs, param))) { LOG_WARN("failed to get sys default stat options", K(ret)); @@ -4276,6 +4452,7 @@ int ObDbmsStats::parse_granularity_and_method_opt(ObExecContext &ctx, if (OB_FAIL(ObDbmsStats::parse_method_opt(ctx, param.allocator_, param.column_params_, param.method_opt_, + param.is_async_gather_, use_size_auto))) { LOG_WARN("failed to parse method opt", K(ret)); } @@ -4454,6 +4631,7 @@ int ObDbmsStats::parse_method_opt(sql::ObExecContext &ctx, ObIAllocator *allocator, ObIArray &column_params, const ObString &method_opt, + const bool is_async_gather, bool &use_size_auto) { int ret = OB_SUCCESS; @@ -4487,7 +4665,7 @@ int ObDbmsStats::parse_method_opt(sql::ObExecContext &ctx, ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(child_node)); } else if (T_FOR_ALL == child_node->type_) { - if (OB_FAIL(parser_for_all_clause(child_node, column_params, use_size_auto))) { + if (OB_FAIL(parser_for_all_clause(child_node, column_params, is_async_gather, use_size_auto))) { LOG_WARN("failed to parser for all clause", K(ret)); } else {/*do nothing*/} } else if (T_FOR_COLUMNS == child_node->type_) { @@ -4505,6 +4683,7 @@ int ObDbmsStats::parse_method_opt(sql::ObExecContext &ctx, int ObDbmsStats::parser_for_all_clause(const ParseNode *for_all_node, ObIArray &column_params, + const bool is_async_gather, bool &use_size_auto) { int ret = OB_SUCCESS; @@ -4534,6 +4713,9 @@ int ObDbmsStats::parser_for_all_clause(const ParseNode *for_all_node, LOG_WARN("failed to parse size clause", K(ret)); } else { use_size_auto = size_conf.is_auto(); + if (is_async_gather && size_conf.is_auto()) {//async gather don't gather histogram default + size_conf.set_manual(1); + } } } for (int64_t i = 0; OB_SUCC(ret) && i < column_params.count(); ++i) { @@ -5448,6 +5630,8 @@ int ObDbmsStats::gather_database_stats_job_proc(sql::ObExecContext &ctx, //do nothing LOG_INFO("auto gather stat abort because of the trace point and not user seesion", K(ctx.get_my_session()->is_user_session()), K(no_auto_gather)); + } else if (OB_FAIL(ObDbmsStatsUtils::cancel_async_gather_stats(ctx))) { + LOG_WARN("failed to cancel async gather stats", K(ret)); } else if (lib::is_oracle_mode() && !params.empty() && !params.at(0).is_null() && OB_FAIL(params.at(0).get_number(num_duration))) { LOG_WARN("failed to get duration", K(ret), K(params.at(0))); @@ -5567,18 +5751,26 @@ int ObDbmsStats::do_gather_table_stats(sql::ObExecContext &ctx, // 1. user table // 2. valid sys table // 3. virtual table + if (OB_LIKELY(task_info.task_table_count_ > 0)) { + -- task_info.task_table_count_; + } } else if (OB_FAIL(schema_guard->get_table_schema(tenant_id, table_id, table_schema))) { LOG_WARN("failed to get table schema", K(ret)); } else if (OB_ISNULL(table_schema)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); + //table may be droped during auto table statistic gathering, caller should ignore this err code + ret = OB_TABLE_NOT_EXIST; + if (OB_LIKELY(task_info.task_table_count_ > 0)) { + -- task_info.task_table_count_; + } } else if (is_recyclebin_database_id(table_schema->get_database_id()) || (lib::is_oracle_mode() && is_oceanbase_sys_database_id(table_schema->get_database_id()))) { - //do nothing + if (OB_LIKELY(task_info.task_table_count_ > 0)) { + -- task_info.task_table_count_; + } } else { StatTable stat_table(table_schema->get_database_id(), table_id); double stale_percent_threshold = OPT_DEFAULT_STALE_PERCENT; - if (OB_FAIL(get_table_stale_percent_threshold(ctx, + if (OB_FAIL(get_table_stale_percent_threshold(ctx.get_sql_proxy(), tenant_id, table_schema->get_table_id(), stale_percent_threshold))) { @@ -5662,7 +5854,7 @@ int ObDbmsStats::get_common_table_stale_percent(sql::ObExecContext &ctx, uint64_t table_id = share::is_oracle_mapping_real_virtual_table(table_schema.get_table_id()) ? share::get_real_table_mappings_tid(table_schema.get_table_id()) : table_schema.get_table_id(); - const int64_t part_id = PARTITION_LEVEL_ZERO == table_schema.get_part_level() ? table_id : -1; + const int64_t part_id = PARTITION_LEVEL_ZERO == table_schema.get_part_level() ? table_schema.get_table_id() : -1; int64_t inc_modified_count = 0; int64_t row_cnt = 0; if (OB_UNLIKELY(table_schema.is_user_table() && -1 == part_id)) { @@ -5755,6 +5947,8 @@ int ObDbmsStats::gather_table_stats_with_default_param(ObExecContext &ctx, ObTableStatParam stat_param; stat_param.allocator_ = &tmp_alloc; stat_param.db_id_ = stat_table.database_id_; + stat_param.is_async_gather_ = stat_table.is_async_gather_; + stat_param.async_partition_ids_ = &stat_table.async_partition_ids_; bool is_all_fast_gather = false; ObSEArray no_gather_index_ids; ObOptStatGatherStat gather_stat(task_info); @@ -5769,9 +5963,13 @@ int ObDbmsStats::gather_table_stats_with_default_param(ObExecContext &ctx, LOG_WARN("failed to get valid duration time", K(ret)); } else if (OB_FAIL(parse_table_part_info(ctx, stat_table, stat_param, true))) { LOG_WARN("failed to parse owner", K(ret)); - } else if (OB_FAIL(use_default_gather_stat_options(ctx, stat_table, stat_param))) { + } else if (OB_FAIL(use_default_gather_stat_options(ctx, stat_param))) { LOG_WARN("failed to use default gather stat optitions", K(ret)); - } else if (OB_FAIL(adjust_auto_gather_stat_option(stat_table.partition_stat_infos_, stat_param))) { + } else if (!stat_table.is_async_gather_ && + OB_FAIL(adjust_auto_gather_stat_option(stat_table.partition_stat_infos_, stat_param))) { + LOG_WARN("failed to use default gather stat optitions", K(ret)); + } else if (stat_table.is_async_gather_ && + OB_FAIL(adjust_async_gather_stat_option(ctx, stat_table.async_partition_ids_, stat_param))) { LOG_WARN("failed to use default gather stat optitions", K(ret)); } else if (!stat_param.need_gather_stats()) { //do nothing @@ -5933,6 +6131,48 @@ int ObDbmsStats::get_new_stat_pref(ObExecContext &ctx, } else { stat_pref = tmp_pref; } + } else if (0 == opt_name.case_compare("ASYNC_GATHER_STALE_RATIO")) { + ObAsyncGatherStaleRatioPrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(allocator, ctx.get_my_session(), opt_value, tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else { + stat_pref = tmp_pref; + } + } else if (0 == opt_name.case_compare("ASYNC_GATHER_SAMPLE_SIZE")) { + ObAsyncGatherSampleSizePrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(allocator, ctx.get_my_session(), opt_value, tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else { + stat_pref = tmp_pref; + } + } else if (0 == opt_name.case_compare("ASYNC_GATHER_FULL_TABLE_SIZE")) { + ObAsyncGatherFullTableSizePrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(allocator, ctx.get_my_session(), opt_value, tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else { + stat_pref = tmp_pref; + } + } else if (0 == opt_name.case_compare("ASYNC_STALE_MAX_TABLE_SIZE")) { + ObAsyncStaleMaxTableSizePrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(allocator, ctx.get_my_session(), opt_value, tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else { + stat_pref = tmp_pref; + } + } else if (0 == opt_name.case_compare("HIST_EST_PERCENT")) { + ObHistEstPercentPrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(allocator, ctx.get_my_session(), opt_value, tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else { + stat_pref = tmp_pref; + } + } else if (0 == opt_name.case_compare("HIST_BLOCK_SAMPLE")) { + ObHistBlockSamplePrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(allocator, ctx.get_my_session(), opt_value, tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else { + stat_pref = tmp_pref; + } } else if (0 == opt_name.case_compare("ONLINE_ESTIMATE_PERCENT")) { ObOnlineEstimatePercentPrefs *tmp_pref = NULL; if (OB_FAIL(new_stat_prefs(allocator, ctx.get_my_session(), opt_value, tmp_pref))) { @@ -5944,15 +6184,17 @@ int ObDbmsStats::get_new_stat_pref(ObExecContext &ctx, ret = OB_ERR_DBMS_STATS_PL; LOG_WARN("Invalid input values for pname", K(ret), K(opt_name)); LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Invalid input values for pname, Only Support CASCADE |"\ - "DEGREE | ESTIMATE_PERCENT | GRANULARITY | INCREMENTAL |"\ - "INCREMENTAL_LEVEL | METHOD_OPT | NO_INVALIDATE | OPTIONS |"\ - "STALE_PERCENT | ESTIMATE_BLOCK | BLOCK_SAMPLE |"\ - "APPROXIMATE_NDV(global prefs unique) | ONLINE_ESTIMATE_PERCENT prefs"); + "DEGREE | ESTIMATE_PERCENT | GRANULARITY | INCREMENTAL |"\ + "INCREMENTAL_LEVEL | METHOD_OPT | NO_INVALIDATE | OPTIONS |"\ + "STALE_PERCENT | ESTIMATE_BLOCK | ASYNC_GATHER_STALE_RATIO |"\ + "ASYNC_GATHER_SAMPLE_SIZE | ASYNC_GATHER_FULL_TABLE_SIZE |"\ + "ASYNC_STALE_MAX_TABLE_SIZE | HIST_EST_PERCENT | HIST_BLOCK_SAMPLE |"\ + "APPROXIMATE_NDV(global prefs unique) | ONLINE_ESTIMATE_PERCENT prefs"); } return ret; } -int ObDbmsStats::get_table_stale_percent_threshold(sql::ObExecContext &ctx, +int ObDbmsStats::get_table_stale_percent_threshold(ObMySQLProxy *mysql_proxy, const uint64_t tenant_id, const uint64_t table_id, double &stale_percent_threshold) @@ -5962,14 +6204,12 @@ int ObDbmsStats::get_table_stale_percent_threshold(sql::ObExecContext &ctx, ObTableStatParam param; ObString opt_name("STALE_PERCENT"); ObArenaAllocator tmp_alloc("OptStatPrefs", OB_MALLOC_NORMAL_BLOCK_SIZE, tenant_id); - param.tenant_id_ = tenant_id; - param.table_id_ = table_id; - param.allocator_ = &tmp_alloc; - if (OB_FAIL(ObDbmsStatsPreferences::get_prefs(ctx, param, opt_name, result))) { + if (OB_FAIL(ObDbmsStatsPreferences::get_prefs(mysql_proxy, tmp_alloc, + tenant_id, table_id, + opt_name, result))) { LOG_WARN("failed to get prefs", K(ret)); } else if (!result.is_null()) { - ObArenaAllocator calc_buf(ObModIds::OB_SQL_PARSER); - ObCastCtx cast_ctx(&calc_buf, NULL, CM_NONE, ObCharset::get_system_collation()); + ObCastCtx cast_ctx(&tmp_alloc, NULL, CM_NONE, ObCharset::get_system_collation()); ObObj dest_obj; if (OB_FAIL(ObObjCaster::to_type(ObDoubleType, cast_ctx, result, dest_obj))) { LOG_WARN("failed to cast number to double type", K(ret)); @@ -6208,9 +6448,7 @@ int ObDbmsStats::resovle_granularity(ObGranularityType granu_type, ObTableStatParam ¶m) { int ret = OB_SUCCESS; - bool is_specify_sample = param.sample_info_.is_sample_ && - param.sample_info_.sample_value_ >= 0.000001 && - param.sample_info_.sample_value_ < 100.0; + bool is_specify_sample = param.is_specify_sample(); if (ObGranularityType::GRANULARITY_AUTO == granu_type) { param.global_stat_param_.set_gather_stat(param.part_name_.empty() && !is_specify_sample); param.part_stat_param_.set_gather_stat(param.part_name_.empty() && @@ -6870,5 +7108,208 @@ void ObDbmsStats::update_optimizer_gather_stat_info(const ObOptStatTaskInfo *tas THIS_WORKER.set_timeout_ts(origin_timeout); } +int ObDbmsStats::async_gather_table_stats(sql::ObExecContext &ctx, + const int64_t duration_time, + int64_t &succeed_cnt, + ObOptStatTaskInfo &task_info) +{ + int ret = OB_SUCCESS; + ObSQLSessionInfo *session = ctx.get_my_session(); + uint64_t tenant_id = OB_INVALID_ID; + if (OB_ISNULL(session)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(session)); + } else if (OB_FALSE_IT(tenant_id = session->get_effective_tenant_id())) { + } else if (is_virtual_tenant_id(tenant_id)) { + // do nothing + } else if (GCONF.in_upgrade_mode()) { + //in upgrade, don't async gather table stats + } else { + int64_t slice_cnt = 1000; // maximum tables we can async gather stats at each iteration + ObSEArray async_stat_tables; + do { + async_stat_tables.reuse(); + if (OB_FAIL(THIS_WORKER.check_status())) { + LOG_WARN("check status failed", KR(ret)); + } else if (OB_FAIL(ObBasicStatsEstimator::get_async_gather_stats_tables(ctx, tenant_id, slice_cnt, async_stat_tables))) { + LOG_WARN("failed to get async gather stats tables", K(ret)); + } else { + task_info.task_table_count_ += async_stat_tables.count(); + for (int64_t i = 0; OB_SUCC(ret) && i < async_stat_tables.count(); ++i) { + if (OB_FAIL(refresh_tenant_schema_guard(ctx, tenant_id))) { + LOG_WARN("refresh tenant schema guard failed", K(ret)); + } else if (OB_FAIL(do_async_gather_table_stats(ctx, tenant_id, async_stat_tables.at(i), + duration_time, succeed_cnt, task_info))) { + LOG_WARN("failed to do async gather table stats", K(ret)); + } + } + } + } while (OB_SUCC(ret) && async_stat_tables.count() == slice_cnt); + } + return ret; +} + +int ObDbmsStats::do_async_gather_table_stats(sql::ObExecContext &ctx, + const uint64_t tenant_id, + const AsyncStatTable &async_table, + const int64_t duration_time, + int64_t &succeed_cnt, + ObOptStatTaskInfo &task_info) +{ + int ret = OB_SUCCESS; + bool is_valid = false; + const ObTableSchema *table_schema = NULL; + share::schema::ObSchemaGetterGuard *schema_guard = ctx.get_virtual_table_ctx().schema_guard_; + if (OB_ISNULL(schema_guard)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(schema_guard)); + } else if (OB_FAIL(ObDbmsStatsUtils::check_is_stat_table(*schema_guard, tenant_id, + async_table.table_id_, is_valid))) { + LOG_WARN("failed to check sy table validity", K(ret)); + } else if (!is_valid) { + // only gather statistics for following tables: + // 1. user table + // 2. valid sys table + // 3. virtual table + if (OB_LIKELY(task_info.task_table_count_ > 0)) { + -- task_info.task_table_count_; + } + } else if (OB_FAIL(schema_guard->get_table_schema(tenant_id, async_table.table_id_, table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + //table may be droped during auto table statistic gathering, caller should ignore this err code + ret = OB_TABLE_NOT_EXIST; + if (OB_LIKELY(task_info.task_table_count_ > 0)) { + -- task_info.task_table_count_; + } + } else if (is_recyclebin_database_id(table_schema->get_database_id()) || + (lib::is_oracle_mode() && is_oceanbase_sys_database_id(table_schema->get_database_id()))) { + if (OB_LIKELY(task_info.task_table_count_ > 0)) { + -- task_info.task_table_count_; + } + } else { + //begin async gather table stats + StatTable stat_table(table_schema->get_database_id(), async_table.table_id_, true/*is_async_gather*/); + if (OB_FAIL(append(stat_table.async_partition_ids_, async_table.partition_ids_))) { + LOG_WARN("failed to append", K(ret)); + } else if (OB_FAIL(gather_table_stats_with_default_param(ctx, duration_time, stat_table, task_info))) { + LOG_WARN("failed to gather table stats with default param", K(ret)); + } + if (OB_FAIL(ret)) { + if (OB_ERR_QUERY_INTERRUPTED == ret) { + LOG_WARN("query interrupted", K(ret)); + } else if (OB_TABLE_NOT_EXIST == ret || OB_TIMEOUT == ret) { + ++task_info.failed_count_; + // do nothing + ret = OB_SUCCESS; + } else { + ++task_info.failed_count_; + LOG_WARN("failed to gather table stats with some unknown reason", K(ret)); + ret = OB_SUCCESS; + } + } else { + ++succeed_cnt; + } + } + return ret; +} + +int ObDbmsStats::adjust_async_gather_stat_option(ObExecContext &ctx, + const ObIArray &async_partition_ids, + ObTableStatParam ¶m) +{ + int ret = OB_SUCCESS; + ObSEArray approx_first_part_ids; + //If the value of async_full_table_size_ is 0, it means that the table no need to async gather stats. + if (param.async_full_table_size_ == 0) { + param.subpart_stat_param_.reset_gather_stat(); + param.part_stat_param_.reset_gather_stat(); + param.global_stat_param_.reset_gather_stat(); + } + if (param.subpart_stat_param_.need_modify_) { + ObSEArray new_subpart_infos; + for (int64_t i = 0; OB_SUCC(ret) && i < param.subpart_infos_.count(); ++i) { + bool found_it = false; + int64_t first_part_id = 0; + for (int64_t j = 0; OB_SUCC(ret) && !found_it && j < async_partition_ids.count(); ++j) { + if (async_partition_ids.at(j) == param.subpart_infos_.at(i).part_id_) { + if (OB_FAIL(new_subpart_infos.push_back(param.subpart_infos_.at(i)))) { + LOG_WARN("failed to push back", K(ret)); + } else { + found_it = true; + first_part_id = param.subpart_infos_.at(i).first_part_id_; + } + } + } + if (OB_SUCC(ret)) { + if (found_it) {//check first partition id need approx regather + bool has_it = false; + for (int64_t j = 0; !has_it && j < approx_first_part_ids.count(); ++j) { + has_it = (first_part_id == approx_first_part_ids.at(j)); + } + if (!has_it) { + if (OB_FAIL(add_var_to_array_no_dup(approx_first_part_ids, first_part_id))) { + LOG_WARN("failed to add var to array no dup", K(ret)); + } + } + } else if (OB_FAIL(param.no_regather_partition_ids_.push_back(param.subpart_infos_.at(i).part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(param.subpart_infos_.assign(new_subpart_infos))) { + LOG_WARN("failed to assign", K(ret)); + } else { + param.subpart_stat_param_.need_modify_ = !new_subpart_infos.empty(); + } + } + } + if (OB_SUCC(ret) && param.part_stat_param_.need_modify_) { + ObSEArray new_part_infos; + for (int64_t i = 0; OB_SUCC(ret) && i < param.part_infos_.count(); ++i) { + bool gather_part = false; + bool approx_found_it = false; + for (int64_t j = 0; OB_SUCC(ret) && !approx_found_it && j < approx_first_part_ids.count(); ++j) { + if (approx_first_part_ids.at(j) == param.part_infos_.at(i).part_id_) { + approx_found_it = true; + if (param.part_stat_param_.can_use_approx_ && + param.subpart_stat_param_.need_modify_ && + param.part_level_ == share::schema::ObPartitionLevel::PARTITION_LEVEL_TWO) { + if (OB_FAIL(param.approx_part_infos_.push_back(param.part_infos_.at(i)))) { + LOG_WARN("failed to push back", K(ret)); + } else { + gather_part = true; + } + } else {/*do nothing*/} + } + } + for (int64_t j = 0; OB_SUCC(ret) && !gather_part && j < async_partition_ids.count(); ++j) { + if (async_partition_ids.at(j) == param.part_infos_.at(i).part_id_) { + gather_part = true; + if (OB_FAIL(new_part_infos.push_back(param.part_infos_.at(i)))) { + LOG_WARN("failed to push back", K(ret)); + } else {/*do nothing*/} + } + } + if (OB_SUCC(ret) && !gather_part) { + if (OB_FAIL(param.no_regather_partition_ids_.push_back(param.part_infos_.at(i).part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(param.part_infos_.assign(new_part_infos))) { + LOG_WARN("failed to assign", K(ret)); + } else { + param.part_stat_param_.can_use_approx_ = !param.approx_part_infos_.empty(); + param.part_stat_param_.need_modify_ = !new_part_infos.empty() || !param.approx_part_infos_.empty(); + } + } + } + LOG_TRACE("succeed to adjust auto gather stat option", K(async_partition_ids), K(param)); + return ret; +} + } } diff --git a/src/pl/sys_package/ob_dbms_stats.h b/src/pl/sys_package/ob_dbms_stats.h index f5b3ab9bb4..c81fd0369e 100644 --- a/src/pl/sys_package/ob_dbms_stats.h +++ b/src/pl/sys_package/ob_dbms_stats.h @@ -50,6 +50,7 @@ struct MethodOptSizeConf inline bool is_repeat() const { return mode_ == 0 && val_ == 1; } inline bool is_skewonly() const { return mode_ == 0 && val_ == 2; } inline bool is_manual() const {return mode_ == 1; } + inline void set_manual(int32_t bucket_size) { mode_ = 1; val_ = bucket_size; } int32_t mode_; int32_t val_; @@ -246,14 +247,20 @@ public: sql::ParamStore ¶ms, common::ObObj &result); + static int async_gather_stats_job_proc(sql::ObExecContext &ctx, + sql::ParamStore ¶ms, + common::ObObj &result); + static int parse_method_opt(sql::ObExecContext &ctx, ObIAllocator *allocator, ObIArray &column_params, const ObString &method_opt, + const bool is_async_gather, bool &use_size_auto); static int parser_for_all_clause(const ParseNode *for_all_node, ObIArray &column_params, + const bool is_async_gather, bool &use_size_auto); static int parser_for_columns_clause(const ParseNode *for_col_node, @@ -340,10 +347,11 @@ public: const ObObjParam &cascade, const ObObjParam &no_invalidate, const ObObjParam &force, + const ObObjParam *hist_est_percent, + const ObObjParam *hist_block_sample, ObTableStatParam ¶m); static int use_default_gather_stat_options(ObExecContext &ctx, - const StatTable &stat_table, ObTableStatParam ¶m); static int get_default_stat_options(ObExecContext &ctx, @@ -396,6 +404,10 @@ public: const ObTableStatParam ¶m, ObOptStatRunningMonitor *running_monitor = NULL); + static int update_stat_cache(const uint64_t tenant_id, + obrpc::ObUpdateStatCacheArg &stat_arg, + ObOptStatRunningMonitor *running_monitor = NULL); + static int parse_set_table_stat_options(ObExecContext &ctx, const ObObjParam &stattab, const ObObjParam &statid, @@ -499,7 +511,7 @@ public: int64_t task_table_count, ObOptStatTaskInfo &task_info); - static int get_table_stale_percent_threshold(sql::ObExecContext &ctx, + static int get_table_stale_percent_threshold(ObMySQLProxy *mysql_proxy, const uint64_t tenant_id, const uint64_t table_id, double &stale_percent_threshold); @@ -626,6 +638,21 @@ private: static int check_system_stat_table_ready(int64_t tenant_id); + static int async_gather_table_stats(sql::ObExecContext &ctx, + const int64_t duration_time, + int64_t &succeed_cnt, + ObOptStatTaskInfo &task_info); + + static int do_async_gather_table_stats(sql::ObExecContext &ctx, + const uint64_t tenant_id, + const AsyncStatTable &async_table, + const int64_t duration_time, + int64_t &succeed_cnt, + ObOptStatTaskInfo &task_info); + + static int adjust_async_gather_stat_option(ObExecContext &ctx, + const ObIArray &async_partition_ids, + ObTableStatParam ¶m); }; } diff --git a/src/rootserver/ob_ddl_operator.cpp b/src/rootserver/ob_ddl_operator.cpp index d56cc05696..de500493b2 100644 --- a/src/rootserver/ob_ddl_operator.cpp +++ b/src/rootserver/ob_ddl_operator.cpp @@ -5703,6 +5703,10 @@ int ObDDLOperator::init_tenant_env( LOG_WARN("insert default tablegroup failed", K(tenant_id), K(ret)); } else if (OB_FAIL(init_tenant_databases(tenant_schema, sys_variable, trans))) { LOG_WARN("insert default databases failed,", K(tenant_id), K(ret)); + } else if (OB_FAIL(init_tenant_optimizer_stats_info(sys_variable, tenant_id, trans))) { + LOG_WARN("failed to init tenant optimizer stats info", K(tenant_id), K(ret)); + } else if (OB_FAIL(init_tenant_spm_configure(tenant_id, trans))) { + LOG_WARN("failed to init tenant spm configure", K(tenant_id), K(ret)); } else if (OB_FAIL(init_tenant_profile(tenant_id, sys_variable, trans))) { LOG_WARN("fail to init tenant profile", K(tenant_id), K(ret)); } else if (OB_FAIL(init_tenant_users(tenant_schema, sys_variable, trans))) { @@ -5895,10 +5899,6 @@ int ObDDLOperator::init_tenant_databases(const ObTenantSchema &tenant_schema, OB_PUBLIC_SCHEMA_ID, "public schema", trans, is_oracle_mode))) { RS_LOG(WARN, "insert public schema failed", K(tenant_id), K(ret)); - } else if (OB_FAIL(init_tenant_optimizer_stats_info(sys_variable, tenant_id, trans))) { - RS_LOG(WARN, "init tenant tenant optimizer control table", K(tenant_id), K(ret)); - } else if (OB_FAIL(init_tenant_spm_configure(tenant_id, trans))) { - RS_LOG(WARN, "init tenant spm configure failed", K(tenant_id), K(ret)); } else { if (!is_oracle_mode) { if (OB_FAIL(init_tenant_database(tenant_schema, mysql_schema, diff --git a/src/share/inner_table/ob_inner_table_schema.21351_21400.cpp b/src/share/inner_table/ob_inner_table_schema.21351_21400.cpp index 8dc49828d0..cc47533d56 100644 --- a/src/share/inner_table/ob_inner_table_schema.21351_21400.cpp +++ b/src/share/inner_table/ob_inner_table_schema.21351_21400.cpp @@ -1160,7 +1160,7 @@ int ObInnerTableSchema::gv_ob_opt_stat_gather_monitor_schema(ObTableSchema &tabl table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(TENANT_ID AS SIGNED) AS TENANT_ID, CAST(SVR_IP AS CHAR(46)) AS SVR_IP, CAST(SVR_PORT AS SIGNED) AS SVR_PORT, CAST(SESSION_ID AS SIGNED) AS SESSION_ID, CAST(TRACE_ID AS CHAR(64)) AS TRACE_ID, CAST(TASK_ID AS CHAR(36)) AS TASK_ID, CAST((CASE WHEN TYPE = 0 THEN 'MANUAL GATHER' ELSE (CASE WHEN TYPE = 1 THEN 'AUTO GATHER' ELSE 'UNDEFINED GATHER' END) END) AS CHAR(16)) AS TYPE, CAST(TASK_START_TIME AS DATETIME(6)) AS TASK_START_TIME, CAST(TASK_DURATION_TIME AS SIGNED) AS TASK_DURATION_TIME, CAST(TASK_TABLE_COUNT AS SIGNED) AS TASK_TABLE_COUNT, CAST(COMPLETED_TABLE_COUNT AS SIGNED) AS COMPLETED_TABLE_COUNT, CAST(RUNNING_TABLE_OWNER AS CHAR(128)) AS RUNNING_TABLE_OWNER, CAST(RUNNING_TABLE_NAME AS CHAR(256)) AS RUNNING_TABLE_NAME, CAST(RUNNING_TABLE_DURATION_TIME AS SIGNED) AS RUNNING_TABLE_DURATION_TIME, CAST(SPARE2 AS CHAR(256)) AS RUNNING_TABLE_PROGRESS FROM oceanbase.__all_virtual_opt_stat_gather_monitor )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(TENANT_ID AS SIGNED) AS TENANT_ID, CAST(SVR_IP AS CHAR(46)) AS SVR_IP, CAST(SVR_PORT AS SIGNED) AS SVR_PORT, CAST(SESSION_ID AS SIGNED) AS SESSION_ID, CAST(TRACE_ID AS CHAR(64)) AS TRACE_ID, CAST(TASK_ID AS CHAR(36)) AS TASK_ID, CAST((CASE WHEN TYPE = 0 THEN 'MANUAL GATHER' ELSE (CASE WHEN TYPE = 1 THEN 'AUTO GATHER' ELSE (CASE WHEN TYPE = 2 THEN 'ASYNC GATHER' ELSE 'UNDEFINED GATHER' END) END) END) AS CHAR(16)) AS TYPE, CAST(TASK_START_TIME AS DATETIME(6)) AS TASK_START_TIME, CAST(TASK_DURATION_TIME AS SIGNED) AS TASK_DURATION_TIME, CAST(TASK_TABLE_COUNT AS SIGNED) AS TASK_TABLE_COUNT, CAST(COMPLETED_TABLE_COUNT AS SIGNED) AS COMPLETED_TABLE_COUNT, CAST(RUNNING_TABLE_OWNER AS CHAR(128)) AS RUNNING_TABLE_OWNER, CAST(RUNNING_TABLE_NAME AS CHAR(256)) AS RUNNING_TABLE_NAME, CAST(RUNNING_TABLE_DURATION_TIME AS SIGNED) AS RUNNING_TABLE_DURATION_TIME, CAST(SPARE2 AS CHAR(256)) AS RUNNING_TABLE_PROGRESS FROM oceanbase.__all_virtual_opt_stat_gather_monitor )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -1260,7 +1260,7 @@ int ObInnerTableSchema::dba_ob_task_opt_stat_gather_history_schema(ObTableSchema table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(TENANT_ID AS SIGNED) AS TENANT_ID, CAST(TASK_ID AS CHAR(36)) AS TASK_ID, CAST((CASE WHEN type = 0 THEN 'MANUAL GATHER' ELSE ( CASE WHEN type = 1 THEN 'AUTO GATHER' ELSE ( CASE WHEN type IS NULL THEN NULL ELSE 'UNDEFINED GATHER' END )END ) END ) AS CHAR(16)) AS TYPE, CAST((CASE WHEN RET_CODE = 0 THEN 'SUCCESS' ELSE (CASE WHEN RET_CODE IS NULL THEN NULL ELSE (CASE WHEN RET_CODE = -5065 THEN 'CANCELED' ELSE 'FAILED' END) END) END) AS CHAR(8)) AS STATUS, CAST(TABLE_COUNT AS SIGNED) AS TABLE_COUNT, CAST(FAILED_COUNT AS SIGNED) AS FAILED_COUNT, CAST(START_TIME AS DATETIME(6)) AS START_TIME, CAST(END_TIME AS DATETIME(6)) AS END_TIME FROM oceanbase.__all_virtual_task_opt_stat_gather_history WHERE TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(TENANT_ID AS SIGNED) AS TENANT_ID, CAST(TASK_ID AS CHAR(36)) AS TASK_ID, CAST((CASE WHEN type = 0 THEN 'MANUAL GATHER' ELSE (CASE WHEN type = 1 THEN 'AUTO GATHER' ELSE (CASE WHEN type = 2 THEN 'ASYNC GATHER' ELSE (CASE WHEN type IS NULL THEN NULL ELSE 'UNDEFINED GATHER' END )END ) END ) END) AS CHAR(16)) AS TYPE, CAST((CASE WHEN RET_CODE = 0 THEN 'SUCCESS' ELSE (CASE WHEN RET_CODE IS NULL THEN NULL ELSE (CASE WHEN RET_CODE = -5065 THEN 'CANCELED' ELSE 'FAILED' END) END) END) AS CHAR(8)) AS STATUS, CAST(TABLE_COUNT AS SIGNED) AS TABLE_COUNT, CAST(FAILED_COUNT AS SIGNED) AS FAILED_COUNT, CAST(START_TIME AS DATETIME(6)) AS START_TIME, CAST(END_TIME AS DATETIME(6)) AS END_TIME FROM oceanbase.__all_virtual_task_opt_stat_gather_history WHERE TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.25201_25250.cpp b/src/share/inner_table/ob_inner_table_schema.25201_25250.cpp index e08bda9562..7c2c19dd0b 100644 --- a/src/share/inner_table/ob_inner_table_schema.25201_25250.cpp +++ b/src/share/inner_table/ob_inner_table_schema.25201_25250.cpp @@ -1310,7 +1310,7 @@ int ObInnerTableSchema::dba_ob_task_opt_stat_gather_history_ora_schema(ObTableSc table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(TENANT_ID AS NUMBER) AS TENANT_ID, CAST(TASK_ID AS VARCHAR2(36)) AS TASK_ID, CAST((CASE WHEN type = 0 THEN 'MANUAL GATHER' ELSE ( CASE WHEN type = 1 THEN 'AUTO GATHER' ELSE ( CASE WHEN type IS NULL THEN NULL ELSE 'UNDEFINED GATHER' END )END ) END ) AS VARCHAR2(16)) AS TYPE, CAST((CASE WHEN RET_CODE = 0 THEN 'SUCCESS' ELSE (CASE WHEN RET_CODE IS NULL THEN NULL ELSE (CASE WHEN RET_CODE = -5065 THEN 'CANCELED' ELSE 'FAILED' END) END) END) AS VARCHAR2(8)) AS STATUS, CAST(TABLE_COUNT AS NUMBER) AS TASK_TABLE_COUNT, CAST(FAILED_COUNT AS NUMBER) AS FAILED_COUNT, CAST(START_TIME AS TIMESTAMP(6)) AS TASK_START_TIME, CAST(END_TIME AS TIMESTAMP(6)) AS TASK_END_TIME FROM SYS.ALL_VIRTUAL_TASK_OPT_STAT_GATHER_HISTORY WHERE TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(TENANT_ID AS NUMBER) AS TENANT_ID, CAST(TASK_ID AS VARCHAR2(36)) AS TASK_ID, CAST((CASE WHEN type = 0 THEN 'MANUAL GATHER' ELSE (CASE WHEN type = 1 THEN 'AUTO GATHER' ELSE (CASE WHEN type = 2 THEN 'ASYNC GATHER' ELSE (CASE WHEN type IS NULL THEN NULL ELSE 'UNDEFINED GATHER' END )END ) END ) END) AS VARCHAR2(16)) AS TYPE, CAST((CASE WHEN RET_CODE = 0 THEN 'SUCCESS' ELSE (CASE WHEN RET_CODE IS NULL THEN NULL ELSE (CASE WHEN RET_CODE = -5065 THEN 'CANCELED' ELSE 'FAILED' END) END) END) AS VARCHAR2(8)) AS STATUS, CAST(TABLE_COUNT AS NUMBER) AS TASK_TABLE_COUNT, CAST(FAILED_COUNT AS NUMBER) AS FAILED_COUNT, CAST(START_TIME AS TIMESTAMP(6)) AS TASK_START_TIME, CAST(END_TIME AS TIMESTAMP(6)) AS TASK_END_TIME FROM SYS.ALL_VIRTUAL_TASK_OPT_STAT_GATHER_HISTORY WHERE TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.28151_28200.cpp b/src/share/inner_table/ob_inner_table_schema.28151_28200.cpp index 686830ab3e..6d7fd69480 100644 --- a/src/share/inner_table/ob_inner_table_schema.28151_28200.cpp +++ b/src/share/inner_table/ob_inner_table_schema.28151_28200.cpp @@ -910,7 +910,7 @@ int ObInnerTableSchema::gv_ob_opt_stat_gather_monitor_ora_schema(ObTableSchema & table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(TENANT_ID AS NUMBER) AS TENANT_ID, CAST(SVR_IP AS VARCHAR2(46)) AS SVR_IP, CAST(SVR_PORT AS NUMBER) AS SVR_PORT, CAST(SESSION_ID AS NUMBER) AS SESSION_ID, CAST(TRACE_ID AS VARCHAR2(64)) AS TRACE_ID, CAST(TASK_ID AS VARCHAR(36)) AS TASK_ID, CAST(DECODE(TYPE, 0, 'MANUAL GATHER', 1, 'AUTO GATHER', 'UNDEFINED GATHER') AS VARCHAR2(16)) AS TYPE, CAST(TASK_START_TIME AS TIMESTAMP(6)) AS TASK_START_TIME, CAST(TASK_DURATION_TIME AS NUMBER) AS TASK_DURATION_TIME, CAST(TASK_TABLE_COUNT AS NUMBER) AS TASK_TABLE_COUNT, CAST(COMPLETED_TABLE_COUNT AS NUMBER) AS COMPLETED_TABLE_COUNT, CAST(RUNNING_TABLE_OWNER AS VARCHAR2(128)) AS RUNNING_TABLE_OWNER, CAST(RUNNING_TABLE_NAME AS VARCHAR2(256)) AS RUNNING_TABLE_NAME, CAST(RUNNING_TABLE_DURATION_TIME AS NUMBER) AS RUNNING_TABLE_DURATION_TIME, CAST(SPARE2 AS VARCHAR2(256)) AS RUNNING_TABLE_PROGRESS FROM SYS.ALL_VIRTUAL_OPT_STAT_GATHER_MONITOR )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(TENANT_ID AS NUMBER) AS TENANT_ID, CAST(SVR_IP AS VARCHAR2(46)) AS SVR_IP, CAST(SVR_PORT AS NUMBER) AS SVR_PORT, CAST(SESSION_ID AS NUMBER) AS SESSION_ID, CAST(TRACE_ID AS VARCHAR2(64)) AS TRACE_ID, CAST(TASK_ID AS VARCHAR(36)) AS TASK_ID, CAST(DECODE(TYPE, 0, 'MANUAL GATHER', 1, 'AUTO GATHER', 2, 'ASYNC GATHER', 'UNDEFINED GATHER') AS VARCHAR2(16)) AS TYPE, CAST(TASK_START_TIME AS TIMESTAMP(6)) AS TASK_START_TIME, CAST(TASK_DURATION_TIME AS NUMBER) AS TASK_DURATION_TIME, CAST(TASK_TABLE_COUNT AS NUMBER) AS TASK_TABLE_COUNT, CAST(COMPLETED_TABLE_COUNT AS NUMBER) AS COMPLETED_TABLE_COUNT, CAST(RUNNING_TABLE_OWNER AS VARCHAR2(128)) AS RUNNING_TABLE_OWNER, CAST(RUNNING_TABLE_NAME AS VARCHAR2(256)) AS RUNNING_TABLE_NAME, CAST(RUNNING_TABLE_DURATION_TIME AS NUMBER) AS RUNNING_TABLE_DURATION_TIME, CAST(SPARE2 AS VARCHAR2(256)) AS RUNNING_TABLE_PROGRESS FROM SYS.ALL_VIRTUAL_OPT_STAT_GATHER_MONITOR )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index c8d9677115..2c6ff5023e 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -30663,7 +30663,8 @@ def_table_schema( CAST(TRACE_ID AS CHAR(64)) AS TRACE_ID, CAST(TASK_ID AS CHAR(36)) AS TASK_ID, CAST((CASE WHEN TYPE = 0 THEN 'MANUAL GATHER' ELSE - (CASE WHEN TYPE = 1 THEN 'AUTO GATHER' ELSE 'UNDEFINED GATHER' END) END) AS CHAR(16)) AS TYPE, + (CASE WHEN TYPE = 1 THEN 'AUTO GATHER' ELSE + (CASE WHEN TYPE = 2 THEN 'ASYNC GATHER' ELSE 'UNDEFINED GATHER' END) END) END) AS CHAR(16)) AS TYPE, CAST(TASK_START_TIME AS DATETIME(6)) AS TASK_START_TIME, CAST(TASK_DURATION_TIME AS SIGNED) AS TASK_DURATION_TIME, CAST(TASK_TABLE_COUNT AS SIGNED) AS TASK_TABLE_COUNT, @@ -30718,9 +30719,10 @@ def_table_schema( CAST(TENANT_ID AS SIGNED) AS TENANT_ID, CAST(TASK_ID AS CHAR(36)) AS TASK_ID, CAST((CASE WHEN type = 0 THEN 'MANUAL GATHER' - ELSE ( CASE WHEN type = 1 THEN 'AUTO GATHER' - ELSE ( CASE WHEN type IS NULL THEN NULL - ELSE 'UNDEFINED GATHER' END )END ) END ) AS CHAR(16)) AS TYPE, + ELSE (CASE WHEN type = 1 THEN 'AUTO GATHER' + ELSE (CASE WHEN type = 2 THEN 'ASYNC GATHER' + ELSE (CASE WHEN type IS NULL THEN NULL + ELSE 'UNDEFINED GATHER' END )END ) END ) END) AS CHAR(16)) AS TYPE, CAST((CASE WHEN RET_CODE = 0 THEN 'SUCCESS' ELSE (CASE WHEN RET_CODE IS NULL THEN NULL ELSE (CASE WHEN RET_CODE = -5065 THEN 'CANCELED' ELSE 'FAILED' END) END) END) AS CHAR(8)) AS STATUS, @@ -53607,9 +53609,10 @@ def_table_schema( CAST(TENANT_ID AS NUMBER) AS TENANT_ID, CAST(TASK_ID AS VARCHAR2(36)) AS TASK_ID, CAST((CASE WHEN type = 0 THEN 'MANUAL GATHER' - ELSE ( CASE WHEN type = 1 THEN 'AUTO GATHER' - ELSE ( CASE WHEN type IS NULL THEN NULL - ELSE 'UNDEFINED GATHER' END )END ) END ) AS VARCHAR2(16)) AS TYPE, + ELSE (CASE WHEN type = 1 THEN 'AUTO GATHER' + ELSE (CASE WHEN type = 2 THEN 'ASYNC GATHER' + ELSE (CASE WHEN type IS NULL THEN NULL + ELSE 'UNDEFINED GATHER' END )END ) END ) END) AS VARCHAR2(16)) AS TYPE, CAST((CASE WHEN RET_CODE = 0 THEN 'SUCCESS' ELSE (CASE WHEN RET_CODE IS NULL THEN NULL ELSE (CASE WHEN RET_CODE = -5065 THEN 'CANCELED' ELSE 'FAILED' END) END) END) AS VARCHAR2(8)) AS STATUS, @@ -62868,7 +62871,7 @@ def_table_schema( CAST(SESSION_ID AS NUMBER) AS SESSION_ID, CAST(TRACE_ID AS VARCHAR2(64)) AS TRACE_ID, CAST(TASK_ID AS VARCHAR(36)) AS TASK_ID, - CAST(DECODE(TYPE, 0, 'MANUAL GATHER', 1, 'AUTO GATHER', 'UNDEFINED GATHER') AS VARCHAR2(16)) AS TYPE, + CAST(DECODE(TYPE, 0, 'MANUAL GATHER', 1, 'AUTO GATHER', 2, 'ASYNC GATHER', 'UNDEFINED GATHER') AS VARCHAR2(16)) AS TYPE, CAST(TASK_START_TIME AS TIMESTAMP(6)) AS TASK_START_TIME, CAST(TASK_DURATION_TIME AS NUMBER) AS TASK_DURATION_TIME, CAST(TASK_TABLE_COUNT AS NUMBER) AS TASK_TABLE_COUNT, diff --git a/src/share/inner_table/sys_package/dbms_stats_body_mysql.sql b/src/share/inner_table/sys_package/dbms_stats_body_mysql.sql index 20b965f228..c63d78c6ed 100644 --- a/src/share/inner_table/sys_package/dbms_stats_body_mysql.sql +++ b/src/share/inner_table/sys_package/dbms_stats_body_mysql.sql @@ -17,7 +17,9 @@ CREATE OR REPLACE PACKAGE BODY dbms_stats statown VARCHAR(65535) DEFAULT NULL, no_invalidate BOOLEAN DEFAULT FALSE, stattype VARCHAR(65535) DEFAULT 'DATA', - force BOOLEAN DEFAULT FALSE + force BOOLEAN DEFAULT FALSE, + hist_est_percent DECIMAL DEFAULT AUTO_SAMPLE_SIZE, + hist_block_sample BOOLEAN DEFAULT NULL ); PRAGMA INTERFACE(C, GATHER_TABLE_STATS); @@ -450,6 +452,7 @@ CREATE OR REPLACE PACKAGE BODY dbms_stats taskid VARCHAR(65535) ); PRAGMA INTERFACE(C, CANCEL_GATHER_STATS); + PROCEDURE GATHER_SYSTEM_STATS(); PRAGMA INTERFACE(C, GATHER_SYSTEM_STATS); @@ -462,4 +465,6 @@ CREATE OR REPLACE PACKAGE BODY dbms_stats ); PRAGMA INTERFACE(C, SET_SYSTEM_STATS); + PROCEDURE async_gather_stats_job_proc (duration BIGINT DEFAULT NULL); + PRAGMA INTERFACE(C, ASYNC_GATHER_STATS_JOB_PROC); END dbms_stats; diff --git a/src/share/inner_table/sys_package/dbms_stats_mysql.sql b/src/share/inner_table/sys_package/dbms_stats_mysql.sql index 9534762185..67908c912a 100644 --- a/src/share/inner_table/sys_package/dbms_stats_mysql.sql +++ b/src/share/inner_table/sys_package/dbms_stats_mysql.sql @@ -23,7 +23,9 @@ create or replace PACKAGE dbms_stats AUTHID CURRENT_USER statown VARCHAR(65535) DEFAULT NULL, no_invalidate BOOLEAN DEFAULT FALSE, stattype VARCHAR(65535) DEFAULT 'DATA', - force BOOLEAN DEFAULT FALSE + force BOOLEAN DEFAULT FALSE, + hist_est_percent DECIMAL DEFAULT AUTO_SAMPLE_SIZE, + hist_block_sample BOOLEAN DEFAULT NULL ); PROCEDURE gather_schema_stats ( @@ -405,6 +407,7 @@ create or replace PACKAGE dbms_stats AUTHID CURRENT_USER PROCEDURE cancel_gather_stats ( taskid VARCHAR(65535) ); + PROCEDURE GATHER_SYSTEM_STATS(); PROCEDURE DELETE_SYSTEM_STATS(); @@ -414,4 +417,5 @@ create or replace PACKAGE dbms_stats AUTHID CURRENT_USER pvalue DECIMAL(20, 10) ); + PROCEDURE async_gather_stats_job_proc (duration BIGINT DEFAULT NULL); END dbms_stats; diff --git a/src/share/ob_upgrade_utils.cpp b/src/share/ob_upgrade_utils.cpp index f7607390cb..d264065f17 100755 --- a/src/share/ob_upgrade_utils.cpp +++ b/src/share/ob_upgrade_utils.cpp @@ -26,6 +26,8 @@ #include "share/ob_tenant_info_proxy.h"//update max ls id #include "ob_upgrade_utils.h" #include "share/config/ob_config_helper.h" +#include "share/stat/ob_dbms_stats_maintenance_window.h" +#include "share/stat/ob_dbms_stats_preferences.h" namespace oceanbase { @@ -1460,6 +1462,8 @@ int ObUpgradeFor4330Processor::post_upgrade() LOG_WARN("fail to check inner stat", KR(ret)); } else if (OB_FAIL(post_upgrade_for_external_table_flag())) { LOG_WARN("fail to alter log external table flag", KR(ret)); + } else if (OB_FAIL(post_upgrade_for_optimizer_stats())) { + LOG_WARN("fail to upgrade optimizer stats", KR(ret)); } return ret; } @@ -1477,6 +1481,42 @@ int ObUpgradeFor4330Processor::post_upgrade_for_external_table_flag() } return ret; } + +int ObUpgradeFor4330Processor::post_upgrade_for_optimizer_stats() +{ + int ret = OB_SUCCESS; + ObSqlString extra_stats_perfs_sql; + ObSqlString add_async_stats_job_sql; + int64_t affected_rows = 0; + bool is_primary_tenant = false; + if (sql_proxy_ == NULL) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sql_proxy is null", K(ret), K(tenant_id_)); + } else if (OB_FAIL(ObAllTenantInfoProxy::is_primary_tenant(sql_proxy_, tenant_id_, is_primary_tenant))) { + LOG_WARN("check is standby tenant failed", K(ret), K(tenant_id_)); + } else if (!is_primary_tenant) { + LOG_INFO("tenant isn't primary standby, no refer to gather stats, skip", K(tenant_id_)); + } else if (OB_FAIL(ObDbmsStatsPreferences::get_extra_stats_perfs_for_upgrade(extra_stats_perfs_sql))) { + LOG_WARN("failed to get extra stats perfs for upgrade", K(ret)); + } else if (OB_FAIL(sql_proxy_->write(tenant_id_, extra_stats_perfs_sql.ptr(), affected_rows))) { + LOG_WARN("failed to write", K(ret)); + } else if (OB_FAIL(ObDbmsStatsMaintenanceWindow::get_async_gather_stats_job_for_upgrade(sql_proxy_, + tenant_id_, + add_async_stats_job_sql))) { + LOG_WARN("failed to get async gather stats job for upgrade", K(ret)); + } else if (OB_UNLIKELY(add_async_stats_job_sql.empty())) { + LOG_INFO("failed to add async stats job in upgrade, perhaps the join already exists, need check after the upgrade."); + } else if (OB_FAIL(sql_proxy_->write(tenant_id_, add_async_stats_job_sql.ptr(), affected_rows))) { + LOG_WARN("failed to write", K(ret)); + } + if (OB_FAIL(ret)) { + LOG_WARN("[UPGRADE] post upgrade for optimizer stats failed", KR(ret), K_(tenant_id)); + } else { + LOG_INFO("[UPGRADE] post upgrade for optimizer stats succeed", K_(tenant_id)); + } + return ret; +} + /* =========== 4330 upgrade processor end ============= */ /* =========== special upgrade processor end ============= */ diff --git a/src/share/ob_upgrade_utils.h b/src/share/ob_upgrade_utils.h index 510e7cdee3..2a2fcd52a1 100755 --- a/src/share/ob_upgrade_utils.h +++ b/src/share/ob_upgrade_utils.h @@ -259,7 +259,6 @@ private: int post_upgrade_for_spm(); int post_upgrade_for_online_estimate_percent(); }; - DEF_SIMPLE_UPGRARD_PROCESSER(4, 3, 2, 1) class ObUpgradeFor4330Processor : public ObBaseUpgradeProcessor @@ -271,6 +270,7 @@ public: virtual int post_upgrade() override; private: int post_upgrade_for_external_table_flag(); + int post_upgrade_for_optimizer_stats(); }; /* =========== special upgrade processor end ============= */ diff --git a/src/share/stat/ob_basic_stats_estimator.cpp b/src/share/stat/ob_basic_stats_estimator.cpp index 4e561c8c4a..ea13427a6e 100644 --- a/src/share/stat/ob_basic_stats_estimator.cpp +++ b/src/share/stat/ob_basic_stats_estimator.cpp @@ -1407,5 +1407,78 @@ int ObBasicStatsEstimator::get_gather_table_type_list(ObSqlString &gather_table_ return ret; } + + +int ObBasicStatsEstimator::get_async_gather_stats_tables(ObExecContext &ctx, + const int64_t tenant_id, + const int64_t max_table_cnt, + ObIArray &stat_tables) +{ + int ret = OB_SUCCESS; + ObSqlString select_sql; + if (OB_FAIL(select_sql.append_fmt("SELECT table_id, "\ + " partition_id "\ + " FROM %s"\ + " WHERE table_id IN (SELECT DISTINCT table_id "\ + " FROM %s "\ + " WHERE tenant_id = %lu "\ + " AND stale_stats = 1 "\ + " AND stattype_locked = 0 limit %lu) "\ + " AND tenant_id = %lu "\ + " AND stale_stats = 1 "\ + " AND stattype_locked = 0 order by 1, 2", + share::OB_ALL_TABLE_STAT_TNAME, + share::OB_ALL_TABLE_STAT_TNAME, + share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id), + max_table_cnt, + share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id)))) { + LOG_WARN("failed to append fmt", K(ret)); + } else { + ObCommonSqlProxy *sql_proxy = ctx.get_sql_proxy(); + SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { + sqlclient::ObMySQLResult *client_result = NULL; + ObSQLClientRetryWeak sql_client_retry_weak(sql_proxy); + if (OB_FAIL(sql_client_retry_weak.read(proxy_result, tenant_id, select_sql.ptr()))) { + LOG_WARN("failed to execute sql", K(ret), K(select_sql)); + } else if (OB_ISNULL(client_result = proxy_result.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to execute sql", K(ret)); + } else { + while (OB_SUCC(ret) && OB_SUCC(client_result->next())) { + int64_t idx_col1 = 0; + int64_t idx_col2 = 1; + ObObj obj; + int64_t table_id = 0; + int64_t partition_id = 0; + if (OB_FAIL(client_result->get_obj(idx_col1, obj))) { + LOG_WARN("failed to get object", K(ret)); + } else if (OB_FAIL(obj.get_int(table_id))) { + LOG_WARN("failed to get int", K(ret), K(obj)); + } else if (OB_FAIL(client_result->get_obj(idx_col2, obj))) { + LOG_WARN("failed to get object", K(ret)); + } else if (OB_FAIL(obj.get_int(partition_id))) { + LOG_WARN("failed to get int", K(ret), K(obj)); + } else if ((stat_tables.empty() || table_id != (stat_tables.at(stat_tables.count() - 1).table_id_)) && + OB_FAIL(stat_tables.push_back(AsyncStatTable(table_id)))) { + LOG_WARN("failed to push back", K(ret)); + } else if (OB_FAIL(stat_tables.at(stat_tables.count() - 1).partition_ids_.push_back(partition_id))) { + LOG_WARN("failed to push back", K(ret)); + } + } + ret = OB_ITER_END == ret ? OB_SUCCESS : ret; + } + int tmp_ret = OB_SUCCESS; + if (NULL != client_result) { + if (OB_SUCCESS != (tmp_ret = client_result->close())) { + LOG_WARN("close result set failed", K(ret), K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + } + } + LOG_TRACE("succeed to get async gather stats tables", K(ret), K(stat_tables)); + } + return ret; +} + } // end of common } // end of oceanbase diff --git a/src/share/stat/ob_basic_stats_estimator.h b/src/share/stat/ob_basic_stats_estimator.h index 5727d10ba8..e8b71f4a99 100644 --- a/src/share/stat/ob_basic_stats_estimator.h +++ b/src/share/stat/ob_basic_stats_estimator.h @@ -142,6 +142,13 @@ public: const int64_t slice_cnt, ObIArray &table_ids); + static int get_async_gather_stats_tables(ObExecContext &ctx, + const int64_t tenant_id, + const int64_t max_table_cnt, + ObIArray &stat_tables); + + static int check_async_gather_need_sample(ObExecContext &ctx, ObTableStatParam ¶m); + int estimate(const ObOptStatGatherParam ¶m, ObIArray &dst_opt_stats); diff --git a/src/share/stat/ob_dbms_stats_executor.cpp b/src/share/stat/ob_dbms_stats_executor.cpp index 2cf03b09e0..a761a27f86 100644 --- a/src/share/stat/ob_dbms_stats_executor.cpp +++ b/src/share/stat/ob_dbms_stats_executor.cpp @@ -273,7 +273,8 @@ int ObDbmsStatsExecutor::no_split_gather_stats(ObExecContext &ctx, /** @brief ObDbmsStatsExecutor::prepare_gather_stats used to prepare gather table stats, including: * 1.estimate block count; - * 2.get the maximum num of partitions and columns for each stat gather. + * 2.adjust async gather param base on the estimate rowcnt info. + * 3.get the maximum num of partitions and columns for each stat gather. */ int ObDbmsStatsExecutor::prepare_gather_stats(ObExecContext &ctx, ObMySQLTransaction &trans, @@ -294,6 +295,11 @@ int ObDbmsStatsExecutor::prepare_gather_stats(ObExecContext &ctx, gather_helper.use_column_store_, gather_helper.use_split_part_))) { LOG_WARN("failed to estimate block count", K(ret)); + } else if (!gather_helper.use_split_part_ && + OB_FAIL(adjsut_async_gather_param(partition_id_block_map, + const_cast(param), + gather_helper.use_split_part_))) { + LOG_WARN("failed to adjsut async gather param", K(ret)); } else if (OB_FAIL(check_need_split_gather(param, gather_helper))) { LOG_WARN("failed to check need split gather", K(ret)); } else { @@ -1801,6 +1807,163 @@ int ObDbmsStatsExecutor::set_system_stats(ObExecContext &ctx, const ObSetSystemS return ret; } +int ObDbmsStatsExecutor::adjsut_async_gather_param(const PartitionIdBlockMap &partition_id_block_map, + ObTableStatParam ¶m, + bool &need_split_part) +{ + int ret = OB_SUCCESS; + need_split_part = false; + if (param.is_async_gather_) { + LOG_TRACE("begin to adjsut async gather param", K(param)); + if (param.part_level_ == share::schema::ObPartitionLevel::PARTITION_LEVEL_ZERO) { + //do nohting + } else { + BlockNumStat *block_num_stat = NULL; + if (OB_FAIL(partition_id_block_map.get_refactored(param.global_part_id_, block_num_stat))) { + if (OB_LIKELY(OB_HASH_NOT_EXIST == ret)) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get refactored", K(ret)); + } + } else if (OB_ISNULL(block_num_stat)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(block_num_stat)); + } else { + int64_t total_row_cnt = block_num_stat->sstable_row_cnt_ + block_num_stat->memtable_row_cnt_; + if (total_row_cnt < param.async_full_table_size_) { + //do nothing + } else if (param.part_level_ == share::schema::ObPartitionLevel::PARTITION_LEVEL_ONE || + param.part_level_ == share::schema::ObPartitionLevel::PARTITION_LEVEL_TWO) { + bool can_derive = true; + int64_t max_part_scan_row_cnt = DEFAULT_ASYNC_MAX_SCAN_ROWCOUNT / 2; + if (param.part_level_ == share::schema::ObPartitionLevel::PARTITION_LEVEL_TWO && + param.subpart_stat_param_.need_modify_) { + int64_t gather_scan_row_cnt = 0; + int64_t i = 0; + ObSEArray no_derive_part_ids; + for (; OB_SUCC(ret) && i < param.subpart_infos_.count() && gather_scan_row_cnt < max_part_scan_row_cnt; ++i) { + if (OB_FAIL(partition_id_block_map.get_refactored(param.subpart_infos_.at(i).part_id_, block_num_stat))) { + if (OB_LIKELY(OB_HASH_NOT_EXIST == ret)) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get refactored", K(ret)); + } + } else if (OB_ISNULL(block_num_stat)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(block_num_stat)); + } else { + int64_t row_cnt = block_num_stat->sstable_row_cnt_ + block_num_stat->memtable_row_cnt_; + gather_scan_row_cnt += row_cnt; + if (row_cnt > param.async_full_table_size_) { + need_split_part = true; + if (OB_FAIL(add_var_to_array_no_dup(no_derive_part_ids, + param.subpart_infos_.at(i).first_part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } + } + if (OB_SUCC(ret) && (i < param.subpart_infos_.count() || !no_derive_part_ids.empty())) { + while (OB_SUCC(ret) && i < param.subpart_infos_.count()) { + int64_t idx = param.subpart_infos_.count() - 1; + if (OB_FAIL(param.no_regather_partition_ids_.push_back(param.subpart_infos_.at(idx).part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } else { + param.subpart_infos_.pop_back(); + } + } + if (OB_SUCC(ret)) { + ObSEArray new_approx_part_infos; + for (int64_t j = 0; OB_SUCC(ret) && j < param.approx_part_infos_.count(); ++j) { + for (int64_t k = 0; can_derive && k < no_derive_part_ids.count(); ++k) { + if (no_derive_part_ids.at(k) == param.approx_part_infos_.at(j).part_id_) { + can_derive = false; + } + } + bool found_it = false; + for (int64_t k = 0; can_derive && !found_it && k < param.subpart_infos_.count(); ++k) { + found_it = param.subpart_infos_.at(k).first_part_id_ == param.approx_part_infos_.at(j).part_id_; + } + if (!found_it || !can_derive) { + if (!can_derive && is_async_gather_partition_id(param.approx_part_infos_.at(j).part_id_, param.async_partition_ids_)) { + if (OB_FAIL(param.part_infos_.push_back(param.approx_part_infos_.at(j)))) { + LOG_WARN("failed to push back", K(ret)); + } + } else if (OB_FAIL(param.no_regather_partition_ids_.push_back(param.approx_part_infos_.at(j).part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } + } else if (OB_FAIL(new_approx_part_infos.push_back(param.approx_part_infos_.at(j)))) { + LOG_WARN("failed to push back", K(ret)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(param.approx_part_infos_.assign(new_approx_part_infos))) { + LOG_WARN("failed to assign", K(ret)); + } + } + } + } + } + if (OB_SUCC(ret) && param.part_stat_param_.need_modify_) { + int64_t gather_scan_row_cnt = 0; + int64_t i = 0; + for (; OB_SUCC(ret) && i < param.part_infos_.count() && gather_scan_row_cnt < max_part_scan_row_cnt; ++i) { + if (OB_FAIL(partition_id_block_map.get_refactored(param.part_infos_.at(i).part_id_, block_num_stat))) { + if (OB_LIKELY(OB_HASH_NOT_EXIST == ret)) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get refactored", K(ret)); + } + } else if (OB_ISNULL(block_num_stat)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(block_num_stat)); + } else { + int64_t row_cnt = block_num_stat->sstable_row_cnt_ + block_num_stat->memtable_row_cnt_; + gather_scan_row_cnt += row_cnt; + if (row_cnt > param.async_full_table_size_) { + need_split_part = true; + can_derive = false; + } + } + } + if (OB_SUCC(ret) && i < param.part_infos_.count()) { + while (OB_SUCC(ret) && i < param.part_infos_.count()) { + int64_t idx = param.part_infos_.count() - 1; + if (OB_FAIL(param.no_regather_partition_ids_.push_back(param.part_infos_.at(idx).part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } else { + param.part_infos_.pop_back(); + } + } + } + } + if (OB_SUCC(ret) && param.global_stat_param_.need_modify_ && !can_derive) { + if (is_async_gather_partition_id(param.global_part_id_, param.async_partition_ids_)) { + param.global_stat_param_.gather_approx_ = false; + } else { + param.global_stat_param_.reset_gather_stat(); + } + } + } + } + } + LOG_TRACE("end to adjsut async gather param", K(param)); + } + return ret; +} + +bool ObDbmsStatsExecutor::is_async_gather_partition_id(const int64_t partition_id, + const ObIArray *async_partition_ids) +{ + bool is_found = false; + if (async_partition_ids != NULL) { + for (int64_t i = 0; !is_found && i < async_partition_ids->count(); ++i) { + is_found = partition_id == async_partition_ids->at(i); + } + } + return is_found; +} + } // namespace common } // namespace oceanbase diff --git a/src/share/stat/ob_dbms_stats_executor.h b/src/share/stat/ob_dbms_stats_executor.h index b74b72f3fc..0eb7f2431f 100644 --- a/src/share/stat/ob_dbms_stats_executor.h +++ b/src/share/stat/ob_dbms_stats_executor.h @@ -202,6 +202,13 @@ private: char *&svr_ip, int32_t &svr_port); + static int adjsut_async_gather_param(const PartitionIdBlockMap &partition_id_block_map, + ObTableStatParam ¶m, + bool &need_split_part); + + static bool is_async_gather_partition_id(const int64_t partition_id, + const ObIArray *async_partition_ids); + }; diff --git a/src/share/stat/ob_dbms_stats_gather.cpp b/src/share/stat/ob_dbms_stats_gather.cpp index 5b4c25660c..bbef2a5b7d 100644 --- a/src/share/stat/ob_dbms_stats_gather.cpp +++ b/src/share/stat/ob_dbms_stats_gather.cpp @@ -42,6 +42,8 @@ int ObDbmsStatsGather::gather_stats(ObExecContext &ctx, LOG_WARN("get unexpected error", K(ret), K(param.allocator_)); } else if (OB_FAIL(init_opt_stats(*param.allocator_, param, opt_stats))) { LOG_WARN("failed to init opt stats", K(ret)); + } else if (OB_FAIL(refine_sample_block_for_async_gather(opt_stats, const_cast(param)))) { + LOG_WARN("failed to refine sample block for async gather", K(ret)); } else if (!opt_stats.empty()) { //1.firstly esimate basic stat ObBasicStatsEstimator basic_est(ctx, *param.allocator_); @@ -250,5 +252,40 @@ int ObDbmsStatsGather::gather_index_stats(ObExecContext &ctx, return ret; } +int ObDbmsStatsGather::refine_sample_block_for_async_gather(const ObIArray &opt_stats, + ObOptStatGatherParam ¶m) +{ + int ret = OB_SUCCESS; + if (param.is_async_gather_ && !param.sample_info_.is_specify_sample()) { + int64_t sstable_row_cnt = 0; + int64_t memtable_row_cnt = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < opt_stats.count(); ++i) { + if (OB_ISNULL(opt_stats.at(i).table_stat_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(opt_stats.at(i).table_stat_)); + } else { + sstable_row_cnt += opt_stats.at(i).table_stat_->get_sstable_row_count(); + memtable_row_cnt += opt_stats.at(i).table_stat_->get_memtable_row_count(); + } + } + if (OB_UNLIKELY(opt_stats.count() > 1 && + sstable_row_cnt + memtable_row_cnt > DEFAULT_ASYNC_MAX_SCAN_ROWCOUNT)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(sstable_row_cnt), K(memtable_row_cnt), K(opt_stats), K(param)); + } else if (opt_stats.count() == 1) { + if (param.async_full_table_size_ < sstable_row_cnt + memtable_row_cnt) { + double sample_ratio = 100.0; + sample_ratio = 1.0 * param.async_gather_sample_size_ / (sstable_row_cnt + memtable_row_cnt) * 100.0; + if (sample_ratio > 0.0 && sample_ratio < 100.0) { + param.sample_info_.set_percent(sample_ratio); + param.sample_info_.set_is_block_sample(true); + } + LOG_INFO("decide async gather stats need sample", K(param), K(opt_stats)); + } + } + } + return ret; +} + } // namespace common } // namespace oceanbase diff --git a/src/share/stat/ob_dbms_stats_gather.h b/src/share/stat/ob_dbms_stats_gather.h index 22eca959cc..9ee73747ea 100644 --- a/src/share/stat/ob_dbms_stats_gather.h +++ b/src/share/stat/ob_dbms_stats_gather.h @@ -48,6 +48,9 @@ private: static int classfy_column_histogram(const ObOptStatGatherParam ¶m, ObOptStat &opt_stat); + static int refine_sample_block_for_async_gather(const ObIArray &opt_stats, + ObOptStatGatherParam ¶m); + }; } // end of sql diff --git a/src/share/stat/ob_dbms_stats_history_manager.cpp b/src/share/stat/ob_dbms_stats_history_manager.cpp index cb32d0f4cc..f2367e04f6 100644 --- a/src/share/stat/ob_dbms_stats_history_manager.cpp +++ b/src/share/stat/ob_dbms_stats_history_manager.cpp @@ -221,12 +221,12 @@ int ObDbmsStatsHistoryManager::backup_table_stats(ObExecContext &ctx, int ret = OB_SUCCESS; ObSEArray no_stat_part_ids; ObSEArray have_stat_part_ids; - bool is_specify_partition_gather = param.is_specify_partition_gather(); + bool is_specify_partition = param.is_specify_partition(); if (part_ids.empty()) { } else if (OB_FAIL(calssify_table_stat_part_ids(ctx, param.tenant_id_, param.table_id_, - is_specify_partition_gather, + is_specify_partition, part_ids, no_stat_part_ids, have_stat_part_ids))) { @@ -234,7 +234,7 @@ int ObDbmsStatsHistoryManager::backup_table_stats(ObExecContext &ctx, } else if (OB_FAIL(backup_having_table_part_stats(trans, param.tenant_id_, param.table_id_, - (is_specify_partition_gather || have_stat_part_ids.count() != part_ids.count()), + (is_specify_partition || have_stat_part_ids.count() != part_ids.count()), have_stat_part_ids, saving_time))) { LOG_WARN("failed to backup having table part stats", K(ret)); @@ -247,7 +247,7 @@ int ObDbmsStatsHistoryManager::backup_table_stats(ObExecContext &ctx, int ObDbmsStatsHistoryManager::calssify_table_stat_part_ids(ObExecContext &ctx, const uint64_t tenant_id, const uint64_t table_id, - const bool is_specify_partition_gather, + const bool is_specify_partition, const ObIArray &partition_ids, ObIArray &no_stat_part_ids, ObIArray &have_stat_part_ids) @@ -261,17 +261,17 @@ int ObDbmsStatsHistoryManager::calssify_table_stat_part_ids(ObExecContext &ctx, if (OB_ISNULL(mysql_proxy) || OB_ISNULL(session) || OB_UNLIKELY(partition_ids.empty())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(mysql_proxy), K(session), K(partition_ids)); - } else if (is_specify_partition_gather && + } else if (is_specify_partition && OB_FAIL(gen_partition_list(partition_ids, partition_list))) { LOG_WARN("failed to gen partition list", K(ret)); - } else if (is_specify_partition_gather && + } else if (is_specify_partition && OB_FAIL(extra_where_str.append_fmt(" and partition_id in %s", partition_list.ptr()))) { LOG_WARN("failed to append fmt", K(ret)); } else if (OB_FAIL(raw_sql.append_fmt(CHECK_TABLE_STAT, share::OB_ALL_TABLE_STAT_TNAME, share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id), share::schema::ObSchemaUtils::get_extract_schema_id(tenant_id, table_id), - is_specify_partition_gather ? extra_where_str.ptr() : " "))) { + is_specify_partition ? extra_where_str.ptr() : " "))) { LOG_WARN("failed to append fmt", K(ret)); } else { SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { @@ -338,7 +338,7 @@ int ObDbmsStatsHistoryManager::calssify_table_stat_part_ids(ObExecContext &ctx, int ObDbmsStatsHistoryManager::backup_having_table_part_stats(ObMySQLTransaction &trans, const uint64_t tenant_id, const uint64_t table_id, - const bool is_specify_partition_gather, + const bool is_specify_partition, const ObIArray &partition_ids, const int64_t saving_time) { @@ -349,10 +349,10 @@ int ObDbmsStatsHistoryManager::backup_having_table_part_stats(ObMySQLTransaction ObSqlString select_sql; int64_t affected_rows = 0; if (partition_ids.empty()) { - } else if (is_specify_partition_gather && + } else if (is_specify_partition && OB_FAIL(gen_partition_list(partition_ids, partition_list))) { LOG_WARN("failed to gen partition list", K(ret)); - } else if (is_specify_partition_gather && + } else if (is_specify_partition && OB_FAIL(extra_where_str.append_fmt(" and partition_id in %s", partition_list.ptr()))) { LOG_WARN("failed to append fmt", K(ret)); } else if (OB_FAIL(select_sql.append_fmt(SELECT_TABLE_STAT, @@ -360,7 +360,7 @@ int ObDbmsStatsHistoryManager::backup_having_table_part_stats(ObMySQLTransaction share::OB_ALL_TABLE_STAT_TNAME, share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id), share::schema::ObSchemaUtils::get_extract_schema_id(tenant_id, table_id), - is_specify_partition_gather ? extra_where_str.ptr() : " "))) { + is_specify_partition ? extra_where_str.ptr() : " "))) { LOG_WARN("failed to append fmt", K(ret)); } else if (OB_FAIL(raw_sql.append_fmt(INSERT_TABLE_STAT_HISTORY, share::OB_ALL_TABLE_STAT_HISTORY_TNAME, @@ -432,8 +432,8 @@ int ObDbmsStatsHistoryManager::backup_column_stats(ObExecContext &ctx, int ret = OB_SUCCESS; hash::ObHashMap having_stat_part_col_map; int64_t map_size = part_ids.count() * column_ids.count(); - bool is_specify_partition_gather = param.is_specify_partition_gather(); - bool is_specify_column_gather = param.is_specify_column_gather(); + bool is_specify_partition = param.is_specify_partition(); + bool is_specify_column = param.is_specify_column(); if (part_ids.empty() || column_ids.empty()) { } else if (OB_FAIL(having_stat_part_col_map.create(map_size, "PartColHashMap", @@ -443,14 +443,14 @@ int ObDbmsStatsHistoryManager::backup_column_stats(ObExecContext &ctx, } else if (OB_FAIL(generate_having_stat_part_col_map(ctx, param.tenant_id_, param.table_id_, - is_specify_partition_gather, - is_specify_column_gather, + is_specify_partition, + is_specify_column, part_ids, column_ids, having_stat_part_col_map))) { LOG_WARN("failed to calssify table stat part ids", K(ret)); } else if (OB_FAIL(backup_having_column_stats(trans, param.tenant_id_, param.table_id_, - is_specify_partition_gather || is_specify_column_gather, + is_specify_partition || is_specify_column, part_ids, column_ids, having_stat_part_col_map, saving_time))) { @@ -461,8 +461,8 @@ int ObDbmsStatsHistoryManager::backup_column_stats(ObExecContext &ctx, saving_time))) { LOG_WARN("failed to backup column part stats", K(ret)); } else if (OB_FAIL(backup_histogram_stats(trans, param.tenant_id_, param.table_id_, - is_specify_partition_gather, - is_specify_column_gather, + is_specify_partition, + is_specify_column, part_ids, column_ids, having_stat_part_col_map, saving_time))) { @@ -474,8 +474,8 @@ int ObDbmsStatsHistoryManager::backup_column_stats(ObExecContext &ctx, int ObDbmsStatsHistoryManager::generate_having_stat_part_col_map(ObExecContext &ctx, const uint64_t tenant_id, const uint64_t table_id, - const bool is_specify_partition_gather, - const bool is_specify_column_gather, + const bool is_specify_partition, + const bool is_specify_column, const ObIArray &partition_ids, const ObIArray &column_ids, hash::ObHashMap &have_stat_part_col_map) @@ -493,28 +493,28 @@ int ObDbmsStatsHistoryManager::generate_having_stat_part_col_map(ObExecContext & OB_UNLIKELY(partition_ids.empty() || column_ids.empty())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(mysql_proxy), K(session), K(partition_ids), K(column_ids)); - } else if (is_specify_partition_gather && + } else if (is_specify_partition && OB_FAIL(gen_partition_list(partition_ids, partition_list))) { LOG_WARN("failed to gen partition list", K(ret)); - } else if (is_specify_partition_gather && + } else if (is_specify_partition && OB_FAIL(extra_partition_str.append_fmt(" and partition_id in %s", partition_list.ptr()))) { LOG_WARN("failed to append fmt", K(ret)); - } else if (is_specify_column_gather && + } else if (is_specify_column && OB_FAIL(gen_column_list(column_ids, column_list))) { LOG_WARN("failed to gen column list", K(ret)); - } else if (is_specify_column_gather && + } else if (is_specify_column && OB_FAIL(extra_column_str.append_fmt(" and column_id in %s", column_list.ptr()))) { LOG_WARN("failed to append fmt", K(ret)); - } else if ((is_specify_partition_gather || is_specify_column_gather) && + } else if ((is_specify_partition || is_specify_column) && OB_FAIL(extra_where_str.append_fmt("%s%s", - is_specify_partition_gather ? extra_partition_str.ptr() : " ", - is_specify_column_gather ? extra_column_str.ptr() : " "))) { + is_specify_partition ? extra_partition_str.ptr() : " ", + is_specify_column ? extra_column_str.ptr() : " "))) { LOG_WARN("failed to append fmt", K(ret)); } else if (OB_FAIL(raw_sql.append_fmt(CHECK_COLUMN_STAT, share::OB_ALL_COLUMN_STAT_TNAME, share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id), share::schema::ObSchemaUtils::get_extract_schema_id(tenant_id, table_id), - (is_specify_partition_gather || is_specify_column_gather) ? extra_where_str.ptr() : " "))) { + (is_specify_partition || is_specify_column) ? extra_where_str.ptr() : " "))) { LOG_WARN("failed to append fmt", K(ret)); } else { SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { @@ -810,8 +810,8 @@ int ObDbmsStatsHistoryManager::backup_no_column_stats(ObMySQLTransaction &trans, int ObDbmsStatsHistoryManager::backup_histogram_stats(ObMySQLTransaction &trans, const uint64_t tenant_id, const uint64_t table_id, - const bool is_specify_partition_gather, - const bool is_specify_column_gather, + const bool is_specify_partition, + const bool is_specify_column, const ObIArray &partition_ids, const ObIArray &column_ids, hash::ObHashMap &having_stat_part_col_map, @@ -827,25 +827,25 @@ int ObDbmsStatsHistoryManager::backup_histogram_stats(ObMySQLTransaction &trans, ObSqlString partition_list; ObSqlString column_list; int64_t affected_rows = 0; - if (is_specify_partition_gather && OB_FAIL(gen_partition_list(partition_ids, partition_list))) { + if (is_specify_partition && OB_FAIL(gen_partition_list(partition_ids, partition_list))) { LOG_WARN("failed to gen partition list", K(ret)); - } else if (is_specify_partition_gather && + } else if (is_specify_partition && OB_FAIL(extra_partition_str.append_fmt(" and partition_id in %s", partition_list.ptr()))) { LOG_WARN("failed to append fmt", K(ret)); - } else if (is_specify_column_gather && OB_FAIL(gen_column_list(column_ids, column_list))) { + } else if (is_specify_column && OB_FAIL(gen_column_list(column_ids, column_list))) { LOG_WARN("failed to gen column list", K(ret)); - } else if (is_specify_column_gather && + } else if (is_specify_column && OB_FAIL(extra_column_str.append_fmt(" and column_id in %s", column_list.ptr()))) { LOG_WARN("failed to append fmt", K(ret)); - } else if ((is_specify_partition_gather || is_specify_column_gather) && + } else if ((is_specify_partition || is_specify_column) && OB_FAIL(extra_where_str.append_fmt("%s%s", - is_specify_partition_gather ? extra_partition_str.ptr() : " ", - is_specify_column_gather ? extra_column_str.ptr() : " "))) { + is_specify_partition ? extra_partition_str.ptr() : " ", + is_specify_column ? extra_column_str.ptr() : " "))) { LOG_WARN("failed to append fmt", K(ret)); } else if (OB_FAIL(where_str.append_fmt(" tenant_id = %lu and table_id = %lu %s", share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id), share::schema::ObSchemaUtils::get_extract_schema_id(tenant_id, table_id), - (is_specify_partition_gather || is_specify_column_gather) ? extra_where_str.ptr() : " "))) { + (is_specify_partition || is_specify_column) ? extra_where_str.ptr() : " "))) { LOG_WARN("failed to append fmt", K(ret)); } else if (OB_FAIL(raw_sql.append_fmt(INSERT_HISTOGRAM_STAT_HISTORY, share::OB_ALL_HISTOGRAM_STAT_HISTORY_TNAME, diff --git a/src/share/stat/ob_dbms_stats_history_manager.h b/src/share/stat/ob_dbms_stats_history_manager.h index 8777cbffa1..b9facad416 100644 --- a/src/share/stat/ob_dbms_stats_history_manager.h +++ b/src/share/stat/ob_dbms_stats_history_manager.h @@ -119,7 +119,7 @@ private: static int calssify_table_stat_part_ids(ObExecContext &ctx, const uint64_t tenant_id, const uint64_t table_id, - const bool is_specify_partition_gather, + const bool is_specify_partition, const ObIArray &partition_ids, ObIArray &no_stat_part_ids, ObIArray &have_stat_part_ids); @@ -127,7 +127,7 @@ private: static int backup_having_table_part_stats(ObMySQLTransaction &trans, const uint64_t tenant_id, const uint64_t table_id, - const bool is_specify_partition_gather, + const bool is_specify_partition, const ObIArray &partition_ids, const int64_t saving_time); @@ -147,8 +147,8 @@ private: static int generate_having_stat_part_col_map(ObExecContext &ctx, const uint64_t tenant_id, const uint64_t table_id, - const bool is_specify_partition_gather, - const bool is_specify_column_gather, + const bool is_specify_partition, + const bool is_specify_column, const ObIArray &partition_ids, const ObIArray &column_ids, hash::ObHashMap &have_stat_part_col_map); @@ -173,8 +173,8 @@ private: static int backup_histogram_stats(ObMySQLTransaction &trans, const uint64_t tenant_id, const uint64_t table_id, - const bool is_specify_partition_gather, - const bool is_specify_column_gather, + const bool is_specify_partition, + const bool is_specify_column, const ObIArray &partition_ids, const ObIArray &column_ids, hash::ObHashMap &having_stat_part_col_map, diff --git a/src/share/stat/ob_dbms_stats_maintenance_window.cpp b/src/share/stat/ob_dbms_stats_maintenance_window.cpp index d9cc26af25..3d9d8b412b 100644 --- a/src/share/stat/ob_dbms_stats_maintenance_window.cpp +++ b/src/share/stat/ob_dbms_stats_maintenance_window.cpp @@ -20,6 +20,7 @@ #include "sql/session/ob_basic_session_info.h" #include "observer/omt/ob_tenant_timezone_mgr.h" #include "lib/timezone/ob_timezone_info.h" +#include "observer/dbms_scheduler/ob_dbms_sched_table_operator.h" #define ALL_TENANT_SCHEDULER_JOB_COLUMN_NAME "tenant_id, " \ "job_name, " \ @@ -64,6 +65,8 @@ const char *windows_name[DAY_OF_WEEK] = {"MONDAY_WINDOW", "SATURDAY_WINDOW", "SUNDAY_WINDOW"}; const char *opt_stats_history_manager = "OPT_STATS_HISTORY_MANAGER"; +const char *async_gather_stats_job_proc = "ASYNC_GATHER_STATS_JOB_PROC"; +const int64_t OPT_STATS_HISTORY_MANAGER_JOB_ID = 8; int ObDbmsStatsMaintenanceWindow::get_stats_maintenance_window_jobs_sql(const ObSysVariableSchema &sys_variable, const uint64_t tenant_id, @@ -156,6 +159,27 @@ int ObDbmsStatsMaintenanceWindow::get_stats_maintenance_window_jobs_sql(const Ob } } + //set async gather stats job + if (OB_FAIL(ret)) { + } else if (OB_FAIL(get_async_gather_stats_job_sql(is_oracle_mode, tenant_id, + job_id++, exec_env, tmp_sql))) { + LOG_WARN("failed to get async gather stats job sql", K(ret)); + } else if (OB_FAIL(raw_sql.append_fmt(", (%s)", tmp_sql.ptr()))) { + LOG_WARN("failed to append sql", K(ret)); + } else { + ++ expected_affected_rows; + tmp_sql.reset(); + if (OB_FAIL(get_async_gather_stats_job_sql(is_oracle_mode, tenant_id, + 0, exec_env, tmp_sql))) { + LOG_WARN("failed to get async gather stats job sql", K(ret)); + } else if (OB_FAIL(raw_sql.append_fmt(", (%s)", tmp_sql.ptr()))) { + LOG_WARN("failed to append sql", K(ret)); + } else { + ++ expected_affected_rows; + tmp_sql.reset(); + } + } + //set dummy guard job if (OB_FAIL(ret)) { } else if (OB_FAIL(get_dummy_guard_job_sql(tenant_id, job_id, tmp_sql))) { @@ -264,6 +288,51 @@ int ObDbmsStatsMaintenanceWindow::get_stats_history_manager_job_sql(const bool i return ret; } +int ObDbmsStatsMaintenanceWindow::get_async_gather_stats_job_sql(const bool is_oracle_mode, + const uint64_t tenant_id, + const int64_t job_id, + const ObString &exec_env, + ObSqlString &raw_sql) +{ + int ret = OB_SUCCESS; + int64_t interval_ts = DEFAULT_ASYNC_GATHER_STATS_INTERVAL_USEC; + int64_t end_date = 64060560000000000;//4000-01-01 00:00:00.000000 + int64_t current = ObTimeUtility::current_time() + DEFAULT_ASYNC_GATHER_STATS_INTERVAL_USEC; + share::ObDMLSqlSplicer dml; + OZ (dml.add_pk_column("tenant_id", share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id))); + OZ (dml.add_column("job_name", ObHexEscapeSqlStr(ObString(async_gather_stats_job_proc)))); + OZ (dml.add_pk_column("job", job_id)); + OZ (dml.add_column("lowner", is_oracle_mode ? ObHexEscapeSqlStr("SYS") : ObHexEscapeSqlStr("root@%"))); + OZ (dml.add_column("powner", is_oracle_mode ? ObHexEscapeSqlStr("SYS") : ObHexEscapeSqlStr("root@%"))); + OZ (dml.add_column("cowner", is_oracle_mode ? ObHexEscapeSqlStr("SYS") : ObHexEscapeSqlStr("oceanbase"))); + OZ (dml.add_time_column("next_date", current)); + OZ (dml.add_column("total", 0)); + OZ (dml.add_column("`interval#`", ObHexEscapeSqlStr(ObString("FREQ=MINUTELY; INTERVAL=15")))); + OZ (dml.add_column("flag", 0)); + OZ (dml.add_column("what", ObHexEscapeSqlStr("DBMS_STATS.ASYNC_GATHER_STATS_JOB_PROC(600000000)"))); + OZ (dml.add_column("nlsenv", ObHexEscapeSqlStr(ObString("")))); + OZ (dml.add_column("field1", ObHexEscapeSqlStr(ObString("")))); + OZ (dml.add_column("exec_env", ObHexEscapeSqlStr(exec_env))); + OZ (dml.add_column("job_style", ObHexEscapeSqlStr(ObString("REGULER")))); + OZ (dml.add_column("program_name", ObHexEscapeSqlStr(ObString("")))); + OZ (dml.add_column("job_type", ObHexEscapeSqlStr(ObString("STORED_PROCEDURE")))); + OZ (dml.add_column("job_action", ObHexEscapeSqlStr("DBMS_STATS.ASYNC_GATHER_STATS_JOB_PROC(600000000)"))); + OZ (dml.add_column("number_of_argument", 0)); + OZ (dml.add_raw_time_column("start_date", current)); + OZ (dml.add_column("repeat_interval", ObHexEscapeSqlStr(ObString("FREQ=MINUTELY; INTERVAL=15")))); + OZ (dml.add_raw_time_column("end_date", end_date)); + OZ (dml.add_column("job_class", ObHexEscapeSqlStr(ObString("DEFAULT_JOB_CLASS")))); + OZ (dml.add_column("enabled", true)); + OZ (dml.add_column("auto_drop", false)); + OZ (dml.add_column("comments", ObHexEscapeSqlStr(ObString("used to async gather stats")))); + OZ (dml.add_column("credential_name", ObHexEscapeSqlStr(ObString("")))); + OZ (dml.add_column("destination_name", ObHexEscapeSqlStr(ObString("")))); + OZ (dml.add_column("interval_ts", interval_ts)); + OZ (dml.add_column("max_run_duration", DEFAULT_ASYNC_GATHER_STATS_DURATION_SEC)); + OZ (dml.splice_values(raw_sql)); + return ret; +} + //this dummy guard job is used to make sure the job id is monotonically increaseing int ObDbmsStatsMaintenanceWindow::get_dummy_guard_job_sql(const uint64_t tenant_id, const int64_t job_id, @@ -372,28 +441,30 @@ int ObDbmsStatsMaintenanceWindow::is_stats_maintenance_window_attr(const sql::Ob } else if (is_stats_job(job_name)) { //now we just support modify job_action、start_date if (0 == attr_name.case_compare("job_action")) { - if (0 == job_name.case_compare(opt_stats_history_manager)) { - const char *job_action_name = "DBMS_STATS.PURGE_STATS("; - if (!val_name.empty() && 0 == strncasecmp(val_name.ptr(), job_action_name, strlen(job_action_name))) { - if (OB_FAIL(dml.add_column("job_action", ObHexEscapeSqlStr(val_name)))) { - LOG_WARN("failed to add column", K(ret)); - } else if (OB_FAIL(dml.add_column("what", ObHexEscapeSqlStr(val_name)))) { - LOG_WARN("failed to add column", K(ret)); - } else { - is_window_attr = true; - } - } else {/*do nothing*/} + const char *history_stats_job = "DBMS_STATS.PURGE_STATS("; + const char *async_gather_stats_job = "DBMS_STATS.ASYNC_GATHER_STATS_JOB_PROC("; + const char *maintenance_window_job = "DBMS_STATS.GATHER_DATABASE_STATS_JOB_PROC("; + if ((0 == job_name.case_compare(opt_stats_history_manager) && + !val_name.empty() && + 0 == strncasecmp(val_name.ptr(), history_stats_job, strlen(history_stats_job))) || + (0 == job_name.case_compare(async_gather_stats_job_proc) && + !val_name.empty() && + 0 == strncasecmp(val_name.ptr(), async_gather_stats_job, strlen(async_gather_stats_job))) || + (0 != job_name.case_compare(opt_stats_history_manager) && + 0 != job_name.case_compare(async_gather_stats_job_proc) && + !val_name.empty() && + 0 == strncasecmp(val_name.ptr(), maintenance_window_job, strlen(maintenance_window_job)))) { + if (OB_FAIL(dml.add_column("job_action", ObHexEscapeSqlStr(val_name)))) { + LOG_WARN("failed to add column", K(ret)); + } else if (OB_FAIL(dml.add_column("what", ObHexEscapeSqlStr(val_name)))) { + LOG_WARN("failed to add column", K(ret)); + } else { + is_window_attr = true; + } } else { - const char *job_action_name = "DBMS_STATS.GATHER_DATABASE_STATS_JOB_PROC("; - if (!val_name.empty() && 0 == strncasecmp(val_name.ptr(), job_action_name, strlen(job_action_name))) { - if (OB_FAIL(dml.add_column("job_action", ObHexEscapeSqlStr(val_name)))) { - LOG_WARN("failed to add column", K(ret)); - } else if (OB_FAIL(dml.add_column("what", ObHexEscapeSqlStr(val_name)))) { - LOG_WARN("failed to add column", K(ret)); - } else { - is_window_attr = true; - } - } else {/*do nothing*/} + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("the hour of interval must be between 0 and 24", K(ret)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "the hour of interval must be between 0 and 24"); } } else if (0 == attr_name.case_compare("next_date")) { ObObj time_obj; @@ -477,7 +548,8 @@ bool ObDbmsStatsMaintenanceWindow::is_stats_job(const ObString &job_name) } } if (!is_true) { - is_true = (0 == job_name.case_compare(opt_stats_history_manager)); + is_true = (0 == job_name.case_compare(opt_stats_history_manager) || + 0 == job_name.case_compare(async_gather_stats_job_proc)); } return is_true; } @@ -550,7 +622,8 @@ int ObDbmsStatsMaintenanceWindow::check_date_validate(const ObString &job_name, LOG_WARN("get unexpected error", K(ret), K(specify_time)); } else if (current_time > specify_time) { is_valid = false; - } else if (0 == job_name.case_compare(opt_stats_history_manager)) { + } else if (0 == job_name.case_compare(opt_stats_history_manager) || + 0 == job_name.case_compare(async_gather_stats_job_proc)) { is_valid = true; } else if (OB_FAIL(ObTimeConverter::usec_to_ob_time(specify_time, ob_time))) { LOG_WARN("failed to usec to ob time", K(ret), K(specify_time)); @@ -567,6 +640,172 @@ int ObDbmsStatsMaintenanceWindow::check_date_validate(const ObString &job_name, return ret; } +int ObDbmsStatsMaintenanceWindow::get_async_gather_stats_job_for_upgrade(common::ObMySQLProxy *sql_proxy, + const uint64_t tenant_id, + ObSqlString &sql) +{ + int ret = OB_SUCCESS; + lib::Worker::CompatMode compat_mode = lib::Worker::CompatMode::INVALID; + int64_t job_id = 0; + ObString exec_env; + ObSqlString values_list; + sql.reset(); + bool is_join_exists = false; + //bug: + ObArenaAllocator allocator("AsyncStatsJob"); + if (OB_FAIL(check_async_gather_job_exists(sql_proxy, tenant_id, is_join_exists))) { + LOG_WARN("failed to check async gather job exists", K(ret)); + } else if (is_join_exists) { + //do nothing + } else if (OB_FAIL(get_async_gather_stats_job_id_and_exec_env(sql_proxy, allocator, tenant_id, job_id, exec_env))) { + LOG_WARN("failed to get async gather stats job id and exec env", K(ret)); + } else if (OB_UNLIKELY(job_id > dbms_scheduler::ObDBMSSchedTableOperator::JOB_ID_OFFSET || + exec_env.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(job_id), K(exec_env)); + } else if (OB_FAIL(ObCompatModeGetter::get_tenant_mode(tenant_id, compat_mode))) { + LOG_WARN("failed to get tenant compat mode", KR(ret), K(tenant_id)); + } else if (OB_FAIL(get_async_gather_stats_job_sql(lib::Worker::CompatMode::ORACLE == compat_mode, + tenant_id, job_id, exec_env, values_list))) { + LOG_WARN("failed to get async gather stats job sql", K(ret)); + } else if (OB_FAIL(sql.append_fmt("REPLACE INTO %s( "ALL_TENANT_SCHEDULER_JOB_COLUMN_NAME") VALUES (%s)", + share::OB_ALL_TENANT_SCHEDULER_JOB_TNAME, + values_list.ptr()))) { + LOG_WARN("failed to append fmt", K(ret)); + } else { + values_list.reset(); + if (OB_FAIL(get_async_gather_stats_job_sql(lib::Worker::CompatMode::ORACLE == compat_mode, + tenant_id, 0, exec_env, values_list))) { + LOG_WARN("failed to get async gather stats job sql", K(ret)); + } else if (OB_FAIL(sql.append_fmt(", (%s);", values_list.ptr()))) { + LOG_WARN("failed to append fmt", K(ret)); + } + } + return ret; +} + +int ObDbmsStatsMaintenanceWindow::get_async_gather_stats_job_id_and_exec_env(common::ObMySQLProxy *sql_proxy, + ObIAllocator &allocator, + const uint64_t tenant_id, + int64_t &job_id, + ObString &exec_env) +{ + int ret = OB_SUCCESS; + ObSqlString select_sql; + if (OB_FAIL(select_sql.append_fmt("SELECT tt.job, t.exec_env FROM"\ + " %s t, (SELECT max(job) + 1 AS job FROM %s"\ + " WHERE tenant_id = %ld and job <= %ld AND job > 0) tt"\ + " WHERE t.tenant_id = %ld and t.job_name = '%s' AND t.job = %ld;", + share::OB_ALL_TENANT_SCHEDULER_JOB_TNAME, + share::OB_ALL_TENANT_SCHEDULER_JOB_TNAME, + share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id), + dbms_scheduler::ObDBMSSchedTableOperator::JOB_ID_OFFSET, + share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id), + opt_stats_history_manager, + OPT_STATS_HISTORY_MANAGER_JOB_ID))) { + LOG_WARN("failed to append fmt", K(ret)); + } else { + SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { + sqlclient::ObMySQLResult *client_result = NULL; + ObSQLClientRetryWeak sql_client_retry_weak(sql_proxy); + if (OB_FAIL(sql_client_retry_weak.read(proxy_result, tenant_id, select_sql.ptr()))) { + LOG_WARN("failed to execute sql", K(ret), K(select_sql)); + } else if (OB_ISNULL(client_result = proxy_result.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to execute sql", K(ret)); + } else { + int64_t get_rows = 0; + //expected only get one row. + while (OB_SUCC(ret) && OB_SUCC(client_result->next())) { + int64_t fisrt_col = 0; + int64_t second_col = 1; + ObObj obj; + ObString tmp_exec_env; + if (get_rows > 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error, expected only one row", K(ret)); + } else if (OB_FAIL(client_result->get_obj(fisrt_col, obj))) { + LOG_WARN("failed to get object", K(ret)); + } else if (OB_FAIL(obj.get_int(job_id))) { + LOG_WARN("failed to get int", K(ret), K(obj)); + } else if (OB_FAIL(client_result->get_obj(second_col, obj))) { + LOG_WARN("failed to get object", K(ret)); + } else if (OB_FAIL(obj.get_varchar(tmp_exec_env))) { + LOG_WARN("failed to get int", K(ret), K(obj)); + } else if (OB_FAIL(ob_write_string(allocator, tmp_exec_env, exec_env))) { + LOG_WARN("failed to ob write string", K(ret)); + } else { + ++ get_rows; + } + } + ret = OB_ITER_END == ret ? OB_SUCCESS : ret; + } + int tmp_ret = OB_SUCCESS; + if (NULL != client_result) { + if (OB_SUCCESS != (tmp_ret = client_result->close())) { + LOG_WARN("close result set failed", K(ret), K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + } + } + LOG_INFO("succeed to get async gather stats job id and exec env", K(ret), K(select_sql), K(job_id), K(exec_env)); + } + return ret; +} + +int ObDbmsStatsMaintenanceWindow::check_async_gather_job_exists(common::ObMySQLProxy *sql_proxy, + const uint64_t tenant_id, + bool &is_join_exists) +{ + int ret = OB_SUCCESS; + is_join_exists = false; + ObSqlString select_sql; + int64_t row_count = 0; + if (OB_FAIL(select_sql.append_fmt("SELECT count(*) FROM %s WHERE tenant_id = %ld and job_name = '%s';", + share::OB_ALL_TENANT_SCHEDULER_JOB_TNAME, + share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id), + async_gather_stats_job_proc))) { + LOG_WARN("failed to append fmt", K(ret)); + } else { + SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { + sqlclient::ObMySQLResult *client_result = NULL; + ObSQLClientRetryWeak sql_client_retry_weak(sql_proxy); + if (OB_FAIL(sql_client_retry_weak.read(proxy_result, tenant_id, select_sql.ptr()))) { + LOG_WARN("failed to execute sql", K(ret), K(select_sql)); + } else if (OB_ISNULL(client_result = proxy_result.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to execute sql", K(ret)); + } else { + //expected only get one row. + while (OB_SUCC(ret) && OB_SUCC(client_result->next())) { + int64_t idx = 0; + ObObj obj; + if (OB_FAIL(client_result->get_obj(idx, obj))) { + LOG_WARN("failed to get object", K(ret)); + } else if (OB_FAIL(obj.get_int(row_count))) { + LOG_WARN("failed to get int", K(ret), K(obj)); + } else if (OB_UNLIKELY(row_count != 2 && row_count != 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(row_count)); + } else { + is_join_exists = row_count > 0; + } + } + ret = OB_ITER_END == ret ? OB_SUCCESS : ret; + } + int tmp_ret = OB_SUCCESS; + if (NULL != client_result) { + if (OB_SUCCESS != (tmp_ret = client_result->close())) { + LOG_WARN("close result set failed", K(ret), K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + } + } + LOG_INFO("succeed to check async gather job exists", K(ret), K(select_sql), K(is_join_exists), K(row_count)); + } + return ret; +} + } // namespace common } // namespace oceanbase diff --git a/src/share/stat/ob_dbms_stats_maintenance_window.h b/src/share/stat/ob_dbms_stats_maintenance_window.h index ccca65545a..62f34eaa73 100644 --- a/src/share/stat/ob_dbms_stats_maintenance_window.h +++ b/src/share/stat/ob_dbms_stats_maintenance_window.h @@ -31,8 +31,9 @@ #define DEFAULT_NON_WORKING_DAY_START_HOHR 6 #define DEFAULT_NON_WORKING_DAY_DURATION_SEC (20 * 60 * 60) #define DEFAULT_NON_WORKING_DAY_DURATION_USEC (20 * 60 * 60 * 1000000LL) -#define DEFAULT_DML_STATS_INTERVAL_USEC (15*60*1000000LL) #define DEFAULT_HISTORY_MANAGER_DURATION_SEC (12 * 60 * 60) +#define DEFAULT_ASYNC_GATHER_STATS_DURATION_SEC (10 * 60) +#define DEFAULT_ASYNC_GATHER_STATS_INTERVAL_USEC (15 * 60 * 1000000LL) namespace oceanbase { @@ -57,6 +58,10 @@ public: static bool is_stats_job(const ObString &job_name); + static int get_async_gather_stats_job_for_upgrade(common::ObMySQLProxy *sql_proxy, + const uint64_t tenant_id, + ObSqlString &sql); + private: static int get_window_job_info(const int64_t current_time, const int64_t nth_window, @@ -92,6 +97,19 @@ private: const int64_t specify_time, const int64_t current_time, bool &is_valid); + static int get_async_gather_stats_job_sql(const bool is_oracle_mode, + const uint64_t tenant_id, + const int64_t job_id, + const ObString &exec_env, + ObSqlString &raw_sql); + static int get_async_gather_stats_job_id_and_exec_env(common::ObMySQLProxy *sql_proxy, + ObIAllocator &allocator, + const uint64_t tenant_id, + int64_t &job_id, + ObString &exec_env); + static int check_async_gather_job_exists(common::ObMySQLProxy *sql_proxy, + const uint64_t tenant_id, + bool &is_join_exists); }; diff --git a/src/share/stat/ob_dbms_stats_preferences.cpp b/src/share/stat/ob_dbms_stats_preferences.cpp index fb5c1b3ae6..abbb5cab77 100644 --- a/src/share/stat/ob_dbms_stats_preferences.cpp +++ b/src/share/stat/ob_dbms_stats_preferences.cpp @@ -66,27 +66,28 @@ int ObDbmsStatsPreferences::reset_global_pref_defaults(ObExecContext &ctx) return ret; } -int ObDbmsStatsPreferences::get_prefs(ObExecContext &ctx, - const ObTableStatParam ¶m, +int ObDbmsStatsPreferences::get_prefs(ObMySQLProxy *mysql_proxy, + ObIAllocator &allocator, + const uint64_t tenant_id, + const uint64_t table_id, const ObString &opt_name, ObObj &result) { int ret = OB_SUCCESS; ObSqlString get_user_sql; ObSqlString get_global_sql; - bool is_user_prefs = (param.table_id_ != OB_INVALID_ID); + bool is_user_prefs = (table_id != OB_INVALID_ID); if (OB_FAIL(get_global_sql.append_fmt(FETCH_GLOBAL_PREFS, share::OB_ALL_OPTSTAT_GLOBAL_PREFS_TNAME, opt_name.length(), opt_name.ptr()))) { LOG_WARN("failed to append fmt", K(ret), K(get_global_sql)); } else if (is_user_prefs) { - uint64_t tenant_id = param.tenant_id_; uint64_t exec_tenant_id = share::schema::ObSchemaUtils::get_exec_tenant_id(tenant_id); if (OB_FAIL(get_user_sql.append_fmt(FETCH_USER_PREFS, share::OB_ALL_OPTSTAT_USER_PREFS_TNAME, share::schema::ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id), - share::schema::ObSchemaUtils::get_extract_schema_id(exec_tenant_id, param.table_id_), + share::schema::ObSchemaUtils::get_extract_schema_id(exec_tenant_id, table_id), opt_name.length(), opt_name.ptr()))) { LOG_WARN("failed to append fmt", K(ret), K(get_user_sql)); @@ -94,11 +95,11 @@ int ObDbmsStatsPreferences::get_prefs(ObExecContext &ctx, } else {/*do nothing*/} if (OB_SUCC(ret)) { bool got_result = false; - if (is_user_prefs && OB_FAIL(do_get_prefs(ctx, param.allocator_, get_user_sql, got_result, result))) { + if (is_user_prefs && OB_FAIL(do_get_prefs(mysql_proxy, allocator, tenant_id, get_user_sql, got_result, result))) { LOG_WARN("failed to do get prefs", K(ret)); } else if (got_result) { /*do nothing*/ - } else if OB_FAIL(do_get_prefs(ctx, param.allocator_, get_global_sql, got_result, result)) { + } else if OB_FAIL(do_get_prefs(mysql_proxy, allocator, tenant_id, get_global_sql, got_result, result)) { LOG_WARN("failed to do get prefs", K(ret)); } else if (got_result) { /*do nothing*/ @@ -216,23 +217,19 @@ int ObDbmsStatsPreferences::delete_user_prefs(ObExecContext &ctx, return ret; } -int ObDbmsStatsPreferences::do_get_prefs(ObExecContext &ctx, - ObIAllocator *allocator, +int ObDbmsStatsPreferences::do_get_prefs(ObMySQLProxy *mysql_proxy, + ObIAllocator &allocator, + const uint64_t tenant_id, const ObSqlString &raw_sql, bool &get_result, ObObj &result) { int ret = OB_SUCCESS; get_result = false; - ObSQLSessionInfo *session = ctx.get_my_session(); - ObMySQLProxy *mysql_proxy = ctx.get_sql_proxy(); - if (OB_ISNULL(mysql_proxy) || OB_ISNULL(session) || - OB_ISNULL(allocator) || OB_UNLIKELY(raw_sql.empty())) { + if (OB_ISNULL(mysql_proxy) || OB_UNLIKELY(raw_sql.empty())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(mysql_proxy), K(session), - K(allocator), K(raw_sql.empty())); + LOG_WARN("get unexpected error", K(ret), K(mysql_proxy), K(raw_sql.empty())); } else { - uint64_t tenant_id = session->get_effective_tenant_id(); SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { sqlclient::ObMySQLResult *client_result = NULL; ObSQLClientRetryWeak sql_client_retry_weak(mysql_proxy); @@ -251,7 +248,7 @@ int ObDbmsStatsPreferences::do_get_prefs(ObExecContext &ctx, LOG_WARN("get unexpected error", K(ret), K(result), K(raw_sql)); } else if (OB_FAIL(client_result->get_obj(idx, tmp))) { LOG_WARN("failed to get object", K(ret)); - } else if (OB_FAIL(ob_write_obj(*allocator, tmp, result))) { + } else if (OB_FAIL(ob_write_obj(allocator, tmp, result))) { LOG_WARN("failed to write object", K(ret)); } else { is_first = false; @@ -366,23 +363,26 @@ int ObDbmsStatsPreferences::gen_init_global_prefs_sql(ObSqlString &raw_sql, ++ total_rows; } } - if (OB_SUCC(ret)) {//init cascade - ObCascadePrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'), ", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } +#define init_perfs_value(perfs_type, is_last_value) \ + if (OB_SUCC(ret)) { \ + perfs_type prefs; \ + if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { \ + ret = OB_ERR_UNEXPECTED; \ + LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), \ + K(prefs.get_stat_pref_default_value())); \ + } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s')%s ", \ + prefs.get_stat_pref_name(), \ + null_str, \ + time_str, \ + prefs.get_stat_pref_default_value(), \ + is_last_value ? ";" : ","))) { \ + LOG_WARN("failed to append", K(ret)); \ + } else { \ + ++ total_rows; \ + } \ } - if (OB_SUCC(ret)) {//init degree + init_perfs_value(ObCascadePrefs, false/*last value*/);//init cascade + if (OB_SUCC(ret)) { ObDegreePrefs prefs; if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_NOT_NULL(prefs.get_stat_pref_default_value())) { ret = OB_ERR_UNEXPECTED; @@ -398,198 +398,24 @@ int ObDbmsStatsPreferences::gen_init_global_prefs_sql(ObSqlString &raw_sql, ++ total_rows; } } - if (OB_SUCC(ret)) {//init esimate_percent - ObEstimatePercentPrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'), ", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) {//init incremental - ObIncrementalPrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'), ", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) {//init incremental_level - ObIncrementalLevelPrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'), ", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) {//init granularity - ObGranularityPrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'), ", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) {//init method_opt - ObMethodOptPrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'), ", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) {//init no_invalidate - ObNoInvalidatePrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'), ", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) {//init options - ObOptionsPrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'), ", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) {//init stale_percent - ObStalePercentPrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'), ", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) {//init approximate_ndv - ObApproximateNdvPrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'),", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) {//init estimate_block - ObEstimateBlockPrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'),", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) {//init block_sample - ObBlockSamplePrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'),", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } - if (OB_SUCC(ret)) { - ObOnlineEstimatePercentPrefs prefs; - if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), - K(prefs.get_stat_pref_default_value())); - } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s');", - prefs.get_stat_pref_name(), - null_str, - time_str, - prefs.get_stat_pref_default_value()))) { - LOG_WARN("failed to append", K(ret)); - } else { - ++ total_rows; - } - } + init_perfs_value(ObEstimatePercentPrefs, false/*last value*/);//init esimate_percent + init_perfs_value(ObIncrementalPrefs, false/*last value*/);//init incremental + init_perfs_value(ObIncrementalLevelPrefs, false/*last value*/);//init incremental_level + init_perfs_value(ObGranularityPrefs, false/*last value*/);//init granularity + init_perfs_value(ObMethodOptPrefs, false/*last value*/);//init method_opt + init_perfs_value(ObNoInvalidatePrefs, false/*last value*/);//init no_invalidate + init_perfs_value(ObOptionsPrefs, false/*last value*/);//init options + init_perfs_value(ObStalePercentPrefs, false/*last value*/);//init stale_percent + init_perfs_value(ObApproximateNdvPrefs, false/*last value*/);//init approximate_ndv + init_perfs_value(ObEstimateBlockPrefs, false/*last value*/);//init estimate_block + init_perfs_value(ObBlockSamplePrefs, false/*last value*/);//init block_sample + init_perfs_value(ObOnlineEstimatePercentPrefs, false/*last value*/); + init_perfs_value(ObAsyncGatherStaleRatioPrefs, false/*last value*/);//init async gather stale ratio + init_perfs_value(ObAsyncGatherSampleSizePrefs, false/*last value*/);//init async gather sample size + init_perfs_value(ObAsyncGatherFullTableSizePrefs, false/*last value*/);//init async gather full table size + init_perfs_value(ObAsyncStaleMaxTableSizePrefs, false/*last value*/);//init async stale max table size + init_perfs_value(ObHistEstPercentPrefs, false/*last value*/);//init hist_est_percent + init_perfs_value(ObHistBlockSamplePrefs, true/*last value*/);//init hist_block_sample if (OB_SUCC(ret)) { if (OB_FAIL(raw_sql.append_fmt(INIT_GLOBAL_PREFS, share::OB_ALL_OPTSTAT_GLOBAL_PREFS_TNAME, @@ -1122,6 +948,171 @@ int ObBlockSamplePrefs::check_pref_value_validity(ObTableStatParam *param/*defau return ret; } +int ObAsyncGatherStaleRatioPrefs::check_pref_value_validity(ObTableStatParam *param/*default null*/) +{ + int ret = OB_SUCCESS; + if (!pvalue_.empty()) { + ObObj src_obj; + ObObj dest_obj; + src_obj.set_string(ObVarcharType, pvalue_); + ObArenaAllocator calc_buf("StaleRatio"); + ObCastCtx cast_ctx(&calc_buf, NULL, CM_NONE, ObCharset::get_system_collation()); + double dst_val = 0.0; + if (OB_FAIL(ObObjCaster::to_type(ObNumberType, cast_ctx, src_obj, dest_obj))) { + LOG_WARN("failed to type", K(ret), K(src_obj)); + } else if (OB_FAIL(ObDbmsStatsUtils::cast_number_to_double(dest_obj.get_number(), dst_val))) { + LOG_WARN("failed to cast number to double", K(ret), K(src_obj)); + } else if (dst_val < MINIMUM_OF_ASYNC_GATHER_STALE_RATIO) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal async gather stale ratio", K(ret), K(dst_val)); + } else if (param != NULL) { + //not implement + } else {/*do nothing*/} + if (OB_FAIL(ret)) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal async gather stale ratio", K(ret), K(pvalue_)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Illegal async gather stale ration, the minimum of stale ratio is not less than 2"); + } + } + return ret; +} + +int ObAsyncGatherSampleSizePrefs::check_pref_value_validity(ObTableStatParam *param/*default null*/) +{ + int ret = OB_SUCCESS; + if (!pvalue_.empty()) { + ObObj src_obj; + ObObj dest_obj; + src_obj.set_string(ObVarcharType, pvalue_); + ObArenaAllocator calc_buf("SampleSize"); + ObCastCtx cast_ctx(&calc_buf, NULL, CM_NONE, ObCharset::get_system_collation()); + int64_t sample_size = 0; + if (OB_FAIL(ObObjCaster::to_type(ObNumberType, cast_ctx, src_obj, dest_obj))) { + LOG_WARN("failed to type", K(ret), K(src_obj)); + } else if (OB_FAIL(dest_obj.get_number().extract_valid_int64_with_trunc(sample_size))) { + LOG_WARN("failed to extract valid int64 with trunc", K(ret), K(src_obj)); + } else if (sample_size < MAGIC_SAMPLE_SIZE) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal async gather sample size", K(ret), K(sample_size)); + } else if (param != NULL) { + param->async_gather_sample_size_ = sample_size; + } else {/*do nothing*/} + if (OB_FAIL(ret)) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal async gather sample size", K(ret), K(pvalue_)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Illegal async gather sample size, the minimum number of rows is not less than 5500."); + } + } + return ret; +} + +int ObAsyncGatherFullTableSizePrefs::check_pref_value_validity(ObTableStatParam *param/*default null*/) +{ + int ret = OB_SUCCESS; + if (!pvalue_.empty()) { + ObObj src_obj; + ObObj dest_obj; + src_obj.set_string(ObVarcharType, pvalue_); + ObArenaAllocator calc_buf("FullTableSize"); + ObCastCtx cast_ctx(&calc_buf, NULL, CM_NONE, ObCharset::get_system_collation()); + int64_t table_size = 0; + if (OB_FAIL(ObObjCaster::to_type(ObNumberType, cast_ctx, src_obj, dest_obj))) { + LOG_WARN("failed to type", K(ret), K(src_obj)); + } else if (OB_FAIL(dest_obj.get_number().extract_valid_int64_with_trunc(table_size))) { + LOG_WARN("failed to extract valid int64 with trunc", K(ret), K(src_obj)); + } else if (table_size < DEFAULT_ASYNC_MIN_TABLE_SIZE && table_size != 0) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal async gather gather full table size", K(ret), K(table_size)); + } else if (param != NULL) { + param->async_full_table_size_ = table_size; + } else {/*do nothing*/} + if (OB_FAIL(ret)) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal async gather gather full table size", K(ret), K(pvalue_)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Illegal async gather gather full table size, the minimum number of rows is not less than 10000."); + } + } + return ret; +} + +int ObAsyncStaleMaxTableSizePrefs::check_pref_value_validity(ObTableStatParam *param/*default null*/) +{ + int ret = OB_SUCCESS; + if (!pvalue_.empty()) { + ObObj src_obj; + ObObj dest_obj; + src_obj.set_string(ObVarcharType, pvalue_); + ObArenaAllocator calc_buf("StaleMaxTabSize"); + ObCastCtx cast_ctx(&calc_buf, NULL, CM_NONE, ObCharset::get_system_collation()); + int64_t table_size = 0; + if (OB_FAIL(ObObjCaster::to_type(ObNumberType, cast_ctx, src_obj, dest_obj))) { + LOG_WARN("failed to type", K(ret), K(src_obj)); + } else if (OB_FAIL(dest_obj.get_number().extract_valid_int64_with_trunc(table_size))) { + LOG_WARN("failed to extract valid int64 with trunc", K(ret), K(src_obj)); + } else if (table_size < DEFAULT_ASYNC_MIN_TABLE_SIZE && table_size != 0) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal async stale max table size", K(ret), K(table_size)); + } else {/*do nothing*/} + if (OB_FAIL(ret)) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal async stale max table size", K(ret), K(pvalue_)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Illegal async stale max table size, the minimum number of rows is not less than 10000."); + } + } + return ret; +} + +int ObHistEstPercentPrefs::check_pref_value_validity(ObTableStatParam *param/*default null*/) +{ + int ret = OB_SUCCESS; + if (!pvalue_.empty()) { + if (0 == pvalue_.case_compare("DBMS_STATS.AUTO_SAMPLE_SIZE")) { + /*do nothing*/ + } else { + ObObj src_obj; + ObObj dest_obj; + src_obj.set_string(ObVarcharType, pvalue_); + ObArenaAllocator calc_buf("HistEstPercent"); + ObCastCtx cast_ctx(&calc_buf, NULL, CM_NONE, ObCharset::get_system_collation()); + double dst_val = 0.0; + if (OB_FAIL(ObObjCaster::to_type(ObNumberType, cast_ctx, src_obj, dest_obj))) { + LOG_WARN("failed to type", K(ret), K(src_obj)); + } else if (OB_FAIL(ObDbmsStatsUtils::cast_number_to_double(dest_obj.get_number(), dst_val))) { + LOG_WARN("failed to cast number to double", K(ret), K(src_obj)); + } else if (dst_val < 0.000001 || dst_val > 100.0) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal value for hist est percent", K(ret), K(dst_val)); + } else if (param != NULL) { + param->hist_sample_info_.set_percent(dst_val); + } else {/*do nothing*/} + if (OB_FAIL(ret)) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Illegal sample percent: must be in the range [0.000001,100]"); + } + } + } + return ret; +} + +int ObHistBlockSamplePrefs::check_pref_value_validity(ObTableStatParam *param/*default null*/) +{ + int ret = OB_SUCCESS; + if (pvalue_.empty() || 0 == pvalue_.case_compare("FALSE")) { + if (param != NULL) { + param->hist_sample_info_.set_is_block_sample(false); + } + } else if (0 == pvalue_.case_compare("TRUE")) { + if (param != NULL) { + param->hist_sample_info_.set_is_block_sample(true); + } + } else { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal value for BLOCK_SAMPLE", K(ret), K(pvalue_)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"Illegal value for BLOCK_SAMPLE: must be {TRUE, FALSE}"); + } + return ret; +} + #define ISSPACE(c) ((c) == ' ' || (c) == '\n' || (c) == '\r' || (c) == '\t' || (c) == '\f' || (c) == '\v') //compatible oracle, global prefs/schema prefs just only can set "for all columns...." @@ -1186,6 +1177,78 @@ int ObOnlineEstimatePercentPrefs::check_pref_value_validity(ObTableStatParam *pa return ret; } +int ObDbmsStatsPreferences::get_extra_stats_perfs_for_upgrade(ObSqlString &raw_sql) +{ + int ret = OB_SUCCESS; + const char *null_str = "NULL"; + const char *time_str = "CURRENT_TIMESTAMP"; + ObSqlString value_str; + if (OB_SUCC(ret)) {//init async gather stale ratio + ObAsyncGatherStaleRatioPrefs prefs; + if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), + K(prefs.get_stat_pref_default_value())); + } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'),", + prefs.get_stat_pref_name(), + null_str, + time_str, + prefs.get_stat_pref_default_value()))) { + LOG_WARN("failed to append", K(ret)); + } + } + if (OB_SUCC(ret)) {//init async gather sample size + ObAsyncGatherSampleSizePrefs prefs; + if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), + K(prefs.get_stat_pref_default_value())); + } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'),", + prefs.get_stat_pref_name(), + null_str, + time_str, + prefs.get_stat_pref_default_value()))) { + LOG_WARN("failed to append", K(ret)); + } + } + if (OB_SUCC(ret)) {//init async gather full table size + ObAsyncGatherFullTableSizePrefs prefs; + if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), + K(prefs.get_stat_pref_default_value())); + } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'), ", + prefs.get_stat_pref_name(), + null_str, + time_str, + prefs.get_stat_pref_default_value()))) { + LOG_WARN("failed to append", K(ret)); + } + } + if (OB_SUCC(ret)) {//init async stale max table size + ObAsyncStaleMaxTableSizePrefs prefs; + if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), + K(prefs.get_stat_pref_default_value())); + } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s');", + prefs.get_stat_pref_name(), + null_str, + time_str, + prefs.get_stat_pref_default_value()))) { + LOG_WARN("failed to append", K(ret)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(raw_sql.append_fmt(UPGRADE_GLOBAL_PREFS, + share::OB_ALL_OPTSTAT_GLOBAL_PREFS_TNAME, + value_str.ptr()))) { + LOG_WARN("failed to append fmt", K(ret)); + } + } + return ret; +} + int ObDbmsStatsPreferences::get_online_estimate_percent_for_upgrade(ObSqlString &raw_sql) { int ret = OB_SUCCESS; diff --git a/src/share/stat/ob_dbms_stats_preferences.h b/src/share/stat/ob_dbms_stats_preferences.h index 140fef096b..4dd94d3383 100644 --- a/src/share/stat/ob_dbms_stats_preferences.h +++ b/src/share/stat/ob_dbms_stats_preferences.h @@ -241,6 +241,84 @@ class ObOnlineEstimatePercentPrefs : public ObStatPrefs const char* get_stat_pref_for_update() const { return "100"; } }; +class ObAsyncGatherStaleRatioPrefs : public ObStatPrefs +{ + public: + ObAsyncGatherStaleRatioPrefs() : ObStatPrefs() {} + ObAsyncGatherStaleRatioPrefs(ObIAllocator *alloc, + ObSQLSessionInfo *session_info, + const ObString &pvalue) : + ObStatPrefs(alloc, session_info, pvalue) {} + virtual int check_pref_value_validity(ObTableStatParam *param = NULL) override; + virtual const char* get_stat_pref_name() const { return "ASYNC_GATHER_STALE_RATIO"; } + virtual const char* get_stat_pref_default_value() const { return "10"; } +}; + +class ObAsyncGatherSampleSizePrefs : public ObStatPrefs +{ + public: + ObAsyncGatherSampleSizePrefs() : ObStatPrefs() {} + ObAsyncGatherSampleSizePrefs(ObIAllocator *alloc, + ObSQLSessionInfo *session_info, + const ObString &pvalue) : + ObStatPrefs(alloc, session_info, pvalue) {} + virtual int check_pref_value_validity(ObTableStatParam *param = NULL) override; + virtual const char* get_stat_pref_name() const { return "ASYNC_GATHER_SAMPLE_SIZE"; } + virtual const char* get_stat_pref_default_value() const { return "1000000"; } +}; + +class ObAsyncGatherFullTableSizePrefs : public ObStatPrefs +{ + public: + ObAsyncGatherFullTableSizePrefs() : ObStatPrefs() {} + ObAsyncGatherFullTableSizePrefs(ObIAllocator *alloc, + ObSQLSessionInfo *session_info, + const ObString &pvalue) : + ObStatPrefs(alloc, session_info, pvalue) {} + virtual int check_pref_value_validity(ObTableStatParam *param = NULL) override; + virtual const char* get_stat_pref_name() const { return "ASYNC_GATHER_FULL_TABLE_SIZE"; } + virtual const char* get_stat_pref_default_value() const { return "10000000"; } +}; + +class ObAsyncStaleMaxTableSizePrefs : public ObStatPrefs +{ + public: + ObAsyncStaleMaxTableSizePrefs() : ObStatPrefs() {} + ObAsyncStaleMaxTableSizePrefs(ObIAllocator *alloc, + ObSQLSessionInfo *session_info, + const ObString &pvalue) : + ObStatPrefs(alloc, session_info, pvalue) {} + virtual int check_pref_value_validity(ObTableStatParam *param = NULL) override; + virtual const char* get_stat_pref_name() const { return "ASYNC_STALE_MAX_TABLE_SIZE"; } + virtual const char* get_stat_pref_default_value() const { return "100000000"; } +}; + +class ObHistEstPercentPrefs : public ObStatPrefs +{ + public: + ObHistEstPercentPrefs() : ObStatPrefs() {} + ObHistEstPercentPrefs(ObIAllocator *alloc, + ObSQLSessionInfo *session_info, + const ObString &pvalue) : + ObStatPrefs(alloc, session_info, pvalue) {} + virtual int check_pref_value_validity(ObTableStatParam *param = NULL) override; + virtual const char* get_stat_pref_name() const { return "HIST_EST_PERCENT"; } + virtual const char* get_stat_pref_default_value() const { return "DBMS_STATS.AUTO_SAMPLE_SIZE";} +}; + +class ObHistBlockSamplePrefs : public ObStatPrefs +{ + public: + ObHistBlockSamplePrefs() : ObStatPrefs() {} + ObHistBlockSamplePrefs(ObIAllocator *alloc, + ObSQLSessionInfo *session_info, + const ObString &pvalue) : + ObStatPrefs(alloc, session_info, pvalue) {} + virtual int check_pref_value_validity(ObTableStatParam *param = NULL) override; + virtual const char* get_stat_pref_name() const { return "HIST_BLOCK_SAMPLE"; } + virtual const char* get_stat_pref_default_value() const { return "FALSE"; } +}; + template static int new_stat_prefs(ObIAllocator &allocator, ObSQLSessionInfo *session_info, const ObString &opt_value, T *&src) @@ -263,8 +341,10 @@ public: static int reset_global_pref_defaults(ObExecContext &ctx); - static int get_prefs(ObExecContext &ctx, - const ObTableStatParam ¶m, + static int get_prefs(ObMySQLProxy *mysql_proxy, + ObIAllocator &allocator, + const uint64_t tenant_id, + const uint64_t table_id, const ObString &opt_name, ObObj &result); @@ -287,9 +367,12 @@ public: static int get_online_estimate_percent_for_upgrade(ObSqlString &sql); + static int get_extra_stats_perfs_for_upgrade(ObSqlString &sql); + private: - static int do_get_prefs(ObExecContext &ctx, - ObIAllocator *allocator, + static int do_get_prefs(ObMySQLProxy *mysql_proxy, + ObIAllocator &allocator, + const uint64_t tenant_id, const ObSqlString &raw_sql, bool &get_result, ObObj &result); diff --git a/src/share/stat/ob_dbms_stats_utils.cpp b/src/share/stat/ob_dbms_stats_utils.cpp index 068b6ca617..38003395fb 100644 --- a/src/share/stat/ob_dbms_stats_utils.cpp +++ b/src/share/stat/ob_dbms_stats_utils.cpp @@ -26,6 +26,8 @@ #include "sql/ob_result_set.h" #include "sql/optimizer/ob_opt_selectivity.h" #include "share/stat/ob_dbms_stats_preferences.h" +#include "observer/ob_sql_client_decorator.h" +#include "share/stat/ob_dbms_stats_executor.h" #ifdef OB_BUILD_ORACLE_PL #include "pl/sys_package/ob_json_pl_utils.h" @@ -171,7 +173,7 @@ int ObDbmsStatsUtils::check_is_stat_table(share::schema::ObSchemaGetterGuard &sc const int64_t table_id, bool &is_valid) { - bool ret = OB_SUCCESS; + int ret = OB_SUCCESS; is_valid = false; const ObTableSchema *table_schema = NULL; if (is_sys_table(table_id)) {//check sys table @@ -1103,6 +1105,13 @@ int ObDbmsStatsUtils::prepare_gather_stat_param(const ObTableStatParam ¶m, gather_param.global_part_id_ = param.global_part_id_; gather_param.gather_vectorize_ = gather_vectorize; gather_param.use_column_store_ = use_column_store; + gather_param.is_async_gather_ = param.is_async_gather_; + gather_param.async_gather_sample_size_ = param.async_gather_sample_size_; + gather_param.async_full_table_size_ = param.async_full_table_size_; + gather_param.hist_sample_info_.is_sample_ = param.hist_sample_info_.is_sample_; + gather_param.hist_sample_info_.is_block_sample_ = param.hist_sample_info_.is_block_sample_; + gather_param.hist_sample_info_.sample_type_ = param.hist_sample_info_.sample_type_; + gather_param.hist_sample_info_.sample_value_ = param.hist_sample_info_.sample_value_; return gather_param.column_group_params_.assign(param.column_group_params_); } @@ -1293,7 +1302,7 @@ int ObDbmsStatsUtils::implicit_commit_before_gather_stats(sql::ObExecContext &ct LOG_WARN("failed to get_optimizer_features_enable_version", K(ret)); } else if (optimizer_features_enable_version < COMPAT_VERSION_4_2_4 || (optimizer_features_enable_version >= COMPAT_VERSION_4_3_0 && - optimizer_features_enable_version < COMPAT_VERSION_4_3_2)) { + optimizer_features_enable_version < COMPAT_VERSION_4_3_3)) { //do nothing } else if (OB_FAIL(ObResultSet::implicit_commit_before_cmd_execute(*ctx.get_my_session(), ctx, stmt::T_ANALYZE))) { LOG_WARN("failed to implicit commit before cmd execute", K(ret)); @@ -1405,5 +1414,131 @@ int ObDbmsStatsUtils::get_sys_online_estimate_percent(sql::ObExecContext &ctx, return ret; } +int ObDbmsStatsUtils::check_can_async_gather_stats(sql::ObExecContext &ctx) +{ + int ret = OB_SUCCESS; + ObSqlString raw_sql; + if (OB_ISNULL(ctx.get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(ctx.get_my_session())); + } else if (OB_FAIL(raw_sql.append_fmt("SELECT 1 FROM dual WHERE EXISTS(SELECT 1 FROM %s WHERE tenant_id = %lu);", + share::OB_ALL_VIRTUAL_OPT_STAT_GATHER_MONITOR_TNAME, + ctx.get_my_session()->get_effective_tenant_id()))) { + LOG_WARN("failed to append fmt", K(ret)); + } else { + uint64_t tenant_id = ctx.get_my_session()->get_effective_tenant_id(); + SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { + sqlclient::ObMySQLResult *client_result = NULL; + ObSQLClientRetryWeak sql_client_retry_weak(ctx.get_sql_proxy()); + if (OB_FAIL(sql_client_retry_weak.read(proxy_result, tenant_id, raw_sql.ptr()))) { + LOG_WARN("failed to execute sql", K(ret), K(raw_sql)); + } else if (OB_ISNULL(client_result = proxy_result.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to execute sql", K(ret)); + } else if (OB_FAIL(client_result->next())) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to get next", K(ret)); + } else { + ret = OB_SUCCESS; + } + } else { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("async stats gathering needs to wait for other stats gathering tasks to finish", K(ret)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"async stats gathering needs to wait for other stats gathering tasks to finish"); + } + int tmp_ret = OB_SUCCESS; + if (NULL != client_result) { + if (OB_SUCCESS != (tmp_ret = client_result->close())) { + LOG_WARN("close result set failed", K(ret), K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + } + } + } + return ret; +} + +int ObDbmsStatsUtils::cancel_async_gather_stats(sql::ObExecContext &ctx) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(ctx.get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(ctx.get_my_session())); + } else { + ObSEArray task_ids; + uint64_t tenant_id = ctx.get_my_session()->get_effective_tenant_id(); + ObArenaAllocator allocator("CancelAsyGather", OB_MALLOC_NORMAL_BLOCK_SIZE, tenant_id); + if (OB_FAIL(fetch_need_cancel_async_gather_stats_task(allocator, ctx, task_ids))) { + LOG_WARN("failed to fetch need cancel async gather stats task", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < task_ids.count(); ++i) { + if (OB_FAIL(ObDbmsStatsExecutor::cancel_gather_stats(ctx, task_ids.at(i)))) { + if (ret != OB_ERR_DBMS_STATS_PL) { + LOG_WARN("failed to cancel gather stats", K(ret)); + } else { + ret = OB_SUCCESS; + } + } + } + } + } + return ret; +} + +int ObDbmsStatsUtils::fetch_need_cancel_async_gather_stats_task(ObIAllocator &allocator, + sql::ObExecContext &ctx, + ObIArray &task_ids) +{ + int ret = OB_SUCCESS; + ObSqlString raw_sql; + if (OB_ISNULL(ctx.get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(ctx.get_my_session())); + } else if (OB_FAIL(raw_sql.append_fmt("SELECT task_id FROM %s WHERE tenant_id = %lu and type = %d;", + share::OB_ALL_VIRTUAL_OPT_STAT_GATHER_MONITOR_TNAME, + ctx.get_my_session()->get_effective_tenant_id(), + ObOptStatGatherType::AYSNC_GATHER))) { + LOG_WARN("failed to append fmt", K(ret)); + } else { + uint64_t tenant_id = ctx.get_my_session()->get_effective_tenant_id(); + SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { + sqlclient::ObMySQLResult *client_result = NULL; + ObSQLClientRetryWeak sql_client_retry_weak(ctx.get_sql_proxy()); + if (OB_FAIL(sql_client_retry_weak.read(proxy_result, tenant_id, raw_sql.ptr()))) { + LOG_WARN("failed to execute sql", K(ret), K(raw_sql)); + } else if (OB_ISNULL(client_result = proxy_result.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to execute sql", K(ret)); + } else { + while (OB_SUCC(ret) && OB_SUCC(client_result->next())) { + int64_t idx = 0; + ObObj obj; + ObString str; + ObString tmp_str; + if (OB_FAIL(client_result->get_obj(idx, obj))) { + LOG_WARN("failed to get object", K(ret)); + } else if (OB_FAIL(obj.get_string(str))) { + LOG_WARN("failed to get string", K(ret)); + } else if (OB_FAIL(ob_write_string(allocator, str, tmp_str))) { + LOG_WARN("failed to get int", K(ret), K(obj)); + } else if (OB_FAIL(task_ids.push_back(tmp_str))) { + LOG_WARN("failed to push back", K(ret)); + } + } + ret = OB_ITER_END == ret ? OB_SUCCESS : ret; + } + int tmp_ret = OB_SUCCESS; + if (NULL != client_result) { + if (OB_SUCCESS != (tmp_ret = client_result->close())) { + LOG_WARN("close result set failed", K(ret), K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + } + } + LOG_TRACE("failed to fetch need cancel async gather stats task", K(task_ids)); + } + return ret; +} + } } diff --git a/src/share/stat/ob_dbms_stats_utils.h b/src/share/stat/ob_dbms_stats_utils.h index 7f966536ef..8ab24a738e 100644 --- a/src/share/stat/ob_dbms_stats_utils.h +++ b/src/share/stat/ob_dbms_stats_utils.h @@ -195,6 +195,10 @@ public: const uint64_t tenant_id, const uint64_t table_id, double &percent); + static int check_can_async_gather_stats(sql::ObExecContext &ctx); + + static int cancel_async_gather_stats(sql::ObExecContext &ctx); + private: static int batch_write(share::schema::ObSchemaGetterGuard *schema_guard, const uint64_t tenant_id, @@ -206,6 +210,10 @@ private: const bool is_online_stat = false, const ObObjPrintParams &print_params = ObObjPrintParams()); + static int fetch_need_cancel_async_gather_stats_task(ObIAllocator &allocator, + sql::ObExecContext &ctx, + ObIArray &task_ids); + }; } diff --git a/src/share/stat/ob_hybrid_hist_estimator.cpp b/src/share/stat/ob_hybrid_hist_estimator.cpp index e2e81ee9d8..8d17a91db8 100644 --- a/src/share/stat/ob_hybrid_hist_estimator.cpp +++ b/src/share/stat/ob_hybrid_hist_estimator.cpp @@ -53,8 +53,10 @@ int ObHybridHistEstimator::estimate(const ObOptStatGatherParam ¶m, ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(hybrid_col_params.count()), K(hybrid_col_stats.count())); } else if (OB_FAIL(compute_estimate_percent(opt_stat.table_stat_->get_row_count(), + opt_stat.table_stat_->get_micro_block_num(), + opt_stat.table_stat_->get_sstable_row_count() >= opt_stat.table_stat_->get_memtable_row_count(), max_num_buckets, - param.sample_info_, + param.hist_sample_info_, need_sample, est_percent, is_block_sample))) { @@ -328,6 +330,8 @@ int ObHybridHistEstimator::try_build_hybrid_hist(const ObColumnStatParam ¶m, * ii: if total_row_count > MAGIC_SAMPLE_SIZE then choosing MAGIC_SAMPLE_SIZE to sample; */ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count, + int64_t micro_block_num, + bool sstable_rows_more, int64_t max_num_bkts, const ObAnalyzeSampleInfo &sample_info, bool &need_sample, @@ -335,11 +339,11 @@ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count, bool &is_block_sample) { int ret = OB_SUCCESS; + is_block_sample = sample_info.is_block_sample_; if (0 == total_row_count) { need_sample = false; } else if (sample_info.is_sample_) { need_sample = true; - is_block_sample = sample_info.is_block_sample_; if (sample_info.sample_type_ == SampleType::RowSample) { if (sample_info.sample_value_ < total_row_count) { est_percent = (sample_info.sample_value_ * 100.0) / total_row_count; @@ -354,32 +358,22 @@ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count, } if (OB_SUCC(ret) && need_sample) { if (total_row_count * est_percent / 100 >= MAGIC_MIN_SAMPLE_SIZE) { - const int64_t MAGIC_MAX_SPECIFY_SAMPLE_SIZE = 1000000; - is_block_sample = !is_block_sample ? total_row_count >= MAX_AUTO_GATHER_FULL_TABLE_ROWS : is_block_sample; - int64_t max_allowed_multiple = max_num_bkts <= ObColumnStatParam::DEFAULT_HISTOGRAM_BUCKET_NUM ? 1 : - max_num_bkts / ObColumnStatParam::DEFAULT_HISTOGRAM_BUCKET_NUM; - int64_t max_specify_sample_size = MAGIC_MAX_SPECIFY_SAMPLE_SIZE * max_allowed_multiple; - if (total_row_count * est_percent / 100 >= max_specify_sample_size) { - est_percent = max_specify_sample_size * 100.0 / total_row_count; - } + //do nothing } else if (total_row_count <= MAGIC_SAMPLE_SIZE) { need_sample = false; est_percent = 0.0; is_block_sample = false; } else { - is_block_sample = total_row_count >= MAX_AUTO_GATHER_FULL_TABLE_ROWS; est_percent = (MAGIC_SAMPLE_SIZE * 100.0) / total_row_count; } } - } else if (total_row_count >= MAX_AUTO_GATHER_FULL_TABLE_ROWS) { - need_sample = true; - is_block_sample = true; - const int64_t MAGIC_MAX_SAMPLE_SIZE = 100000; - est_percent = MAGIC_MAX_SAMPLE_SIZE * 100.0 / total_row_count; } else if (total_row_count >= MAGIC_MAX_AUTO_SAMPLE_SIZE) { + if (micro_block_num > MAXIMUM_BLOCK_CNT_OF_ROW_SAMPLE_GATHER_HYBRID_HIST || + total_row_count > MAXIMUM_ROWS_OF_ROW_SAMPLE_GATHER_HYBRID_HIST) { + is_block_sample = true; + } if (max_num_bkts <= ObColumnStatParam::DEFAULT_HISTOGRAM_BUCKET_NUM) { need_sample = true; - is_block_sample = false; est_percent = (MAGIC_SAMPLE_SIZE * 100.0) / total_row_count; } else { int64_t num_bound_bkts = static_cast(std::round(total_row_count * MAGIC_SAMPLE_CUT_RATIO)); @@ -389,7 +383,6 @@ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count, int64_t sample_size = MAGIC_SAMPLE_SIZE + MAGIC_BASE_SAMPLE_SIZE + (max_num_bkts - ObColumnStatParam::DEFAULT_HISTOGRAM_BUCKET_NUM) * MAGIC_MIN_SAMPLE_SIZE * 0.01; need_sample = true; - is_block_sample = false; est_percent = (sample_size * 100.0) / total_row_count; } } @@ -397,6 +390,11 @@ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count, need_sample = false; } if (OB_SUCC(ret)) { + //refine est_percent avoid sampling block cnt is too small. + if (is_block_sample && sstable_rows_more && + (est_percent / 100.0 * micro_block_num) < MINIMUM_BLOCK_CNT_OF_BLOCK_SAMPLE_HYBRID_HIST) { + est_percent = MINIMUM_BLOCK_CNT_OF_BLOCK_SAMPLE_HYBRID_HIST * 100.0 / micro_block_num; + } // refine est_percent est_percent = std::max(0.000001, est_percent); if (est_percent >= 100) { @@ -404,7 +402,7 @@ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count, } } - LOG_TRACE("Succeed to compute estimate percent", K(ret), K(total_row_count), K(max_num_bkts), + LOG_TRACE("Succeed to compute estimate percent", K(ret), K(total_row_count), K(max_num_bkts), K(micro_block_num), K(need_sample), K(est_percent), K(is_block_sample)); return ret; } diff --git a/src/share/stat/ob_hybrid_hist_estimator.h b/src/share/stat/ob_hybrid_hist_estimator.h index 3c2fd7008d..e67ac64c01 100644 --- a/src/share/stat/ob_hybrid_hist_estimator.h +++ b/src/share/stat/ob_hybrid_hist_estimator.h @@ -109,6 +109,8 @@ private: bool &is_done); int compute_estimate_percent(int64_t total_row_count, + int64_t micro_block_num, + bool sstable_rows_more, int64_t max_num_buckets, const ObAnalyzeSampleInfo &sample_info, bool &need_sample, diff --git a/src/share/stat/ob_incremental_stat_estimator.cpp b/src/share/stat/ob_incremental_stat_estimator.cpp index 54939a9a13..a7b25baa1f 100644 --- a/src/share/stat/ob_incremental_stat_estimator.cpp +++ b/src/share/stat/ob_incremental_stat_estimator.cpp @@ -603,7 +603,6 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx, ObGlobalNdvEval ndv_eval; ObGlobalAvglenEval avglen_eval; ObSEArray all_part_histograms; - bool can_try_derive_hist = need_derive_hist; int64_t max_bucket_num = param.column_params_.at(i).bucket_num_; for (int64_t j = 0; OB_SUCC(ret) && j < part_cnt; ++j) { ObOptColumnStat *opt_col_stat = NULL; @@ -630,7 +629,6 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx, OB_FAIL(all_part_histograms.push_back(opt_col_stat->get_histogram()))) { LOG_WARN("failed to push back histogram", K(ret)); } else { - can_try_derive_hist &= (opt_col_stat->get_num_distinct() == 0 || opt_col_stat->get_histogram().is_valid()); null_eval.add(opt_col_stat->get_num_null()); if (opt_col_stat->get_num_distinct() != 0) { min_eval.add(opt_col_stat->get_min_value()); @@ -667,7 +665,7 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx, } else if (OB_FAIL(col_stats.push_back(col_stat))) { LOG_WARN("failed to push back", K(ret)); } else if (need_derive_hist) { - if (can_try_derive_hist && col_stat->get_num_distinct() > 0) { + if (col_stat->get_num_distinct() > 0 && !all_part_histograms.empty()) { if (OB_FAIL(derive_global_histogram(all_part_histograms, alloc, max_bucket_num, @@ -678,15 +676,16 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx, need_gather_hist))) { LOG_WARN("failed to derive global histogram from part histogram", K(ret)); } - } else if (max_bucket_num > 1 && + } else if (approx_level == PARTITION_LEVEL && + max_bucket_num > 1 && param.column_params_.at(i).need_basic_stat() && col_stat->get_num_distinct() > 0) { need_gather_hist = true; - int64_t max_disuse_cnt = std::ceil(col_stat->get_num_not_null() * 1.0 / max_bucket_num); //After testing, the error of using hyperloglog to estimate ndv is within %5. + //In order to improve the gathering efficiency, it is required that ndv cannot be less than the number of buckets const double MAX_LLC_NDV_ERR_RATE = !param.need_approx_ndv_ ? 0.0 : 0.05; const int64_t fault_tolerance_cnt = std::ceil(col_stat->get_num_distinct() * MAX_LLC_NDV_ERR_RATE); - if (col_stat->get_num_distinct() >= max_bucket_num + max_disuse_cnt + fault_tolerance_cnt) { + if (col_stat->get_num_distinct() >= max_bucket_num + fault_tolerance_cnt) { //directly gather hybrid histogram col_stat->get_histogram().set_type(ObHistType::HYBIRD); } else { @@ -694,7 +693,7 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx, col_stat->get_histogram().set_type(ObHistType::TOP_FREQUENCY); } } - LOG_TRACE("succeed to derive global col stat", K(*col_stat)); + LOG_TRACE("succeed to derive global col stat", K(*col_stat), K(need_gather_hist)); } } } @@ -754,8 +753,8 @@ int ObIncrementalStatEstimator::derive_global_histogram(ObIArray &a LOG_WARN("failed to allocate memory", K(ret), K(ptr)); } else { ObTopKFrequencyHistograms *top_k_fre_hist = new (ptr) ObTopKFrequencyHistograms(); - top_k_fre_hist->set_window_size(1000); - top_k_fre_hist->set_item_size(256); + top_k_fre_hist->set_window_size(ObStatTopKHist::get_window_size(max_bucket_num)); + top_k_fre_hist->set_item_size(max_bucket_num); top_k_fre_hist->set_is_topk_hist_need_des_row(true); top_k_fre_hist->set_max_disuse_cnt(std::ceil(not_null_count * 1.0 / max_bucket_num)); for (int64_t i = 0; OB_SUCC(ret) && i < all_part_histograms.count(); ++i) { diff --git a/src/share/stat/ob_opt_stat_gather_stat.cpp b/src/share/stat/ob_opt_stat_gather_stat.cpp index 641031aa43..800bd5cf35 100644 --- a/src/share/stat/ob_opt_stat_gather_stat.cpp +++ b/src/share/stat/ob_opt_stat_gather_stat.cpp @@ -192,7 +192,7 @@ int ObOptStatGatherStat::deep_copy(common::ObIAllocator &allocator, ObOptStatGat return ret; } //----------------------------------------------------- -int ObOptStatRunningMonitor::add_table_info(common::ObTableStatParam &table_param, +int ObOptStatRunningMonitor::add_table_info(const common::ObTableStatParam &table_param, double stale_percent) { int ret = OB_SUCCESS; @@ -211,7 +211,7 @@ int ObOptStatRunningMonitor::add_table_info(common::ObTableStatParam &table_para opt_stat_gather_stat_.set_table_id(table_param.table_id_); ObSqlString properties_sql_str; char *buf = NULL; - if (OB_FAIL(properties_sql_str.append_fmt("GRANULARITY:%.*s;METHOD_OPT:%.*s;DEGREE:%ld;ESTIMATE_PERCENT:%lf;BLOCK_SAMPLE:%d;STALE_PERCENT:%lf;", + if (OB_FAIL(properties_sql_str.append_fmt("GRANULARITY:%.*s;METHOD_OPT:%.*s;DEGREE:%ld;ESTIMATE_PERCENT:%lf;BLOCK_SAMPLE:%d;STALE_PERCENT:%lf;HIST_EST_PERCENT:%lf", table_param.granularity_.length(), table_param.granularity_.ptr(), table_param.method_opt_.length(), @@ -219,7 +219,8 @@ int ObOptStatRunningMonitor::add_table_info(common::ObTableStatParam &table_para table_param.degree_, table_param.sample_info_.is_sample_ ? table_param.sample_info_.sample_value_ : 100.0, table_param.sample_info_.is_block_sample_, - stale_percent))) { + stale_percent, + table_param.hist_sample_info_.is_sample_ ? table_param.hist_sample_info_.sample_value_ : 100.0))) { LOG_WARN("failed to append fmt", K(ret)); } else if (OB_ISNULL(buf = static_cast(allocator_.alloc(properties_sql_str.length())))) { ret = OB_ALLOCATE_MEMORY_FAILED; diff --git a/src/share/stat/ob_opt_stat_gather_stat.h b/src/share/stat/ob_opt_stat_gather_stat.h index 3bfe8ef697..3a47b44732 100644 --- a/src/share/stat/ob_opt_stat_gather_stat.h +++ b/src/share/stat/ob_opt_stat_gather_stat.h @@ -26,7 +26,8 @@ namespace common enum ObOptStatGatherType { INVALID_GATHER_TYPE = -1, MANUAL_GATHER, - AUTO_GATHER + AUTO_GATHER, + AYSNC_GATHER }; enum ObOptStatRunningPhase { @@ -213,7 +214,7 @@ struct ObOptStatRunningMonitor void init(int64_t current_time, int64_t current_memory_used, ObOptStatGatherStat &opt_stat_gather_stat); - int add_table_info(common::ObTableStatParam &table_param, + int add_table_info(const common::ObTableStatParam &table_param, double stale_percent = -1.0); int add_monitor_info(ObOptStatRunningPhase current_phase, double extra_progress_ratio = 0); double get_monitor_extra_progress_ratio(ObOptStatRunningPhase current_phase, diff --git a/src/share/stat/ob_opt_stat_manager.cpp b/src/share/stat/ob_opt_stat_manager.cpp index 2e42a468d4..8afbe48eb9 100644 --- a/src/share/stat/ob_opt_stat_manager.cpp +++ b/src/share/stat/ob_opt_stat_manager.cpp @@ -637,6 +637,7 @@ int ObOptStatManager::get_table_stat(const uint64_t tenant_id, opt_stat.get_micro_block_num() * scale_ratio); stat.set_last_analyzed(opt_stat.get_last_analyzed()); stat.set_stat_locked(opt_stat.is_locked()); + stat.set_stale_stats(opt_stat.is_stat_expired()); } return ret; } diff --git a/src/share/stat/ob_opt_stat_monitor_manager.cpp b/src/share/stat/ob_opt_stat_monitor_manager.cpp index e2648ddc29..466aafd356 100644 --- a/src/share/stat/ob_opt_stat_monitor_manager.cpp +++ b/src/share/stat/ob_opt_stat_monitor_manager.cpp @@ -26,6 +26,7 @@ #include "storage/ob_locality_manager.h" #include "lib/rc/ob_rc.h" #include "observer/ob_server.h" +#include "pl/sys_package/ob_dbms_stats.h" namespace oceanbase { @@ -73,6 +74,30 @@ namespace common "updates = updates + values(updates)," \ "deletes = deletes + values(deletes);" +#define INSERT_STALE_TABLE_STAT_SQL "INSERT /*+QUERY_TIMEOUT(60000000)*/INTO %s(tenant_id," \ + "table_id," \ + "partition_id," \ + "index_type," \ + "object_type," \ + "last_analyzed," \ + "sstable_row_cnt," \ + "sstable_avg_row_len," \ + "macro_blk_cnt," \ + "micro_blk_cnt," \ + "memtable_row_cnt," \ + "memtable_avg_row_len," \ + "row_cnt," \ + "avg_row_len," \ + "global_stats," \ + "user_stats," \ + "stattype_locked," \ + "stale_stats) VALUES %s" \ + "ON DUPLICATE KEY UPDATE " \ + "stale_stats = if(last_analyzed > 0, stale_stats, values(stale_stats))" + +#define STALE_TABLE_STAT_MOCK_VALUE_PATTERN "(%lu, %lu, %ld, 0, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, 0, 0, 0, 1)" + + void ObOptStatMonitorFlushAllTask::runTimerTask() { int ret = OB_SUCCESS; @@ -81,6 +106,7 @@ void ObOptStatMonitorFlushAllTask::runTimerTask() share::schema::ObMultiVersionSchemaService &schema_service = share::schema::ObMultiVersionSchemaService::get_instance(); share::schema::ObSchemaGetterGuard schema_guard; bool in_restore = false; + THIS_WORKER.set_timeout_ts(FLUSH_INTERVAL / 2 + ObTimeUtility::current_time()); if (OB_FAIL(schema_service.get_tenant_schema_guard(optstat_monitor_mgr_->tenant_id_, schema_guard))) { LOG_WARN("failed to get schema guard", K(ret)); } else if (OB_FAIL(schema_guard.check_tenant_is_restore(optstat_monitor_mgr_->tenant_id_, in_restore))) { @@ -103,6 +129,7 @@ void ObOptStatMonitorCheckTask::runTimerTask() share::schema::ObMultiVersionSchemaService &schema_service = share::schema::ObMultiVersionSchemaService::get_instance(); share::schema::ObSchemaGetterGuard schema_guard; bool in_restore = false; + THIS_WORKER.set_timeout_ts(CHECK_INTERVAL + ObTimeUtility::current_time()); if (OB_FAIL(schema_service.get_tenant_schema_guard(optstat_monitor_mgr_->tenant_id_, schema_guard))) { LOG_WARN("failed to get schema guard", K(ret)); } else if (OB_FAIL(schema_guard.check_tenant_is_restore(optstat_monitor_mgr_->tenant_id_, in_restore))) { @@ -349,6 +376,7 @@ int ObOptStatMonitorManager::update_dml_stat_info() } else if (!dml_stats.empty()) { ObSqlString value_sql; int count = 0; + uint64_t tenant_id = dml_stats.at(0).tenant_id_; for (int64_t i = 0; OB_SUCC(ret) && i < dml_stats.count(); ++i) { if (OB_FAIL(get_dml_stat_sql(dml_stats.at(i), 0 != count, value_sql))) { LOG_WARN("failed to get dml stat sql", K(ret)); @@ -367,8 +395,18 @@ int ObOptStatMonitorManager::update_dml_stat_info() } } if (OB_SUCC(ret)) { + bool no_check = (OB_E(EventTable::EN_LEADER_STORAGE_ESTIMATION) OB_SUCCESS) != OB_SUCCESS; + uint64_t data_version = 0; if (OB_FAIL(clean_useless_dml_stat_info())) { LOG_WARN("failed to clean useless dml stat info", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, data_version))) { + LOG_WARN("fail to get tenant data version", KR(ret), K(tenant_id), K(data_version)); + } else if (data_version < MOCK_DATA_VERSION_4_2_4_0 || + (data_version >= DATA_VERSION_4_3_0_0 && data_version < DATA_VERSION_4_3_3_0) || + no_check) { + //do nothing + } else if (OB_FAIL(check_opt_stats_expired(dml_stats))) { + LOG_WARN("failed to check opt stats expired", K(ret)); } else {/*do nohting*/} } } @@ -699,12 +737,15 @@ int ObOptStatMonitorManager::update_dml_stat_info(const ObIArray { int ret = OB_SUCCESS; ObSqlString value_sql; - int count = 0; + int64_t count = 0; LOG_TRACE("begin to update dml stat info from direct load", K(dml_stats)); + ObSEArray tmp_dml_stats; for (int64_t i = 0; OB_SUCC(ret) && i < dml_stats.count(); ++i) { if (OB_ISNULL(dml_stats.at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpcted error", K(ret), K(dml_stats.at(i))); + } else if (OB_FAIL(tmp_dml_stats.push_back(*dml_stats.at(i)))) { + LOG_WARN("failed to push back", K(ret)); } else { if (OB_FAIL(get_dml_stat_sql(*dml_stats.at(i), 0 != count, value_sql))) { LOG_WARN("failed to get dml stat sql", K(ret)); @@ -723,6 +764,19 @@ int ObOptStatMonitorManager::update_dml_stat_info(const ObIArray LOG_WARN("failed to exec insert sql", K(ret)); } } + if (OB_SUCC(ret) && !tmp_dml_stats.empty()) { + bool no_check = (OB_E(EventTable::EN_LEADER_STORAGE_ESTIMATION) OB_SUCCESS) != OB_SUCCESS; + uint64_t data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(tmp_dml_stats.at(0).tenant_id_, data_version))) { + LOG_WARN("fail to get tenant data version", KR(ret), K(tmp_dml_stats.at(0).tenant_id_), K(data_version)); + } else if (data_version < MOCK_DATA_VERSION_4_2_4_0 || + (data_version >= DATA_VERSION_4_3_0_0 && data_version < DATA_VERSION_4_3_3_0) || + no_check) { + //do nothing + } else if (OB_FAIL(check_opt_stats_expired(tmp_dml_stats, true/*is_from_direct_load*/))) { + LOG_WARN("failed to check opt stats expired", K(ret)); + } else {/*do nohting*/} + } return ret; } @@ -816,5 +870,759 @@ int ObOptStatMonitorManager::get_dml_stats(ObIArray &dml_stats) return ret; } +int ObOptStatMonitorManager::check_opt_stats_expired(ObIArray &dml_stats, + bool is_from_direct_load/*default false*/) +{ + int ret = OB_SUCCESS; + if (!dml_stats.empty()) { + ObSEArray stale_infos; + int64_t begin_ts = ObTimeUtility::current_time(); + int64_t global_async_stale_max_table_size = DEFAULT_ASYNC_STALE_MAX_TABLE_SIZE; + if (OB_FAIL(get_async_stale_max_table_size(dml_stats.at(0).tenant_id_, + OB_INVALID_ID, + global_async_stale_max_table_size))) { + LOG_WARN("failed to get async stale max table size", K(ret)); + } else if (OB_UNLIKELY(global_async_stale_max_table_size <= 0)) { + LOG_INFO("skip to check opt stats expired", K(global_async_stale_max_table_size)); + } else if (OB_FAIL(get_opt_stats_expired_table_info(dml_stats, stale_infos, is_from_direct_load))) { + LOG_WARN("failed to get opt stats expired table info", K(ret)); + } else { + const int64_t MIN_ASYNC_GATHER_TABLE_ROW_CNT = 500; + for (int64_t i = 0; OB_SUCC(ret) && i < stale_infos.count(); ++i) { + if (stale_infos.at(i).inserts_ <= MIN_ASYNC_GATHER_TABLE_ROW_CNT) { + //do nothing + } else if (OB_FAIL(mark_the_opt_stat_expired(stale_infos.at(i)))) { + LOG_WARN("failed to mark the opt stat expired", K(ret)); + } + } + } + if (ObTimeUtility::current_time() - begin_ts > ObOptStatMonitorCheckTask::CHECK_INTERVAL) { + LOG_INFO("check opt stats expired cost too much time", K(begin_ts), K(ObTimeUtility::current_time() - begin_ts), K(dml_stats)); + } + } + return ret; } + +int ObOptStatMonitorManager::get_opt_stats_expired_table_info(ObIArray &dml_stats, + ObIArray &stale_infos, + bool is_from_direct_load) +{ + int ret = OB_SUCCESS; + int64_t begin_idx = 0; + while (OB_SUCC(ret) && begin_idx < dml_stats.count()) { + ObSqlString where_list; + int64_t end_idx = std::min(begin_idx + MAX_PROCESS_BATCH_TABLET_CNT, dml_stats.count()); + uint64_t tenant_id = dml_stats.at(begin_idx).tenant_id_; + if (OB_FAIL(gen_tablet_list(dml_stats, begin_idx, end_idx, is_from_direct_load, where_list))) { + LOG_WARN("failed to gen tablet list", K(ret)); + } else if (where_list.empty()) { + //do nothing + } else if (OB_FAIL(do_get_opt_stats_expired_table_info(tenant_id, where_list, stale_infos))) { + LOG_WARN("failed to do get opt stats expired table info", K(ret)); + } + begin_idx = end_idx; + } + return ret; } + +int ObOptStatMonitorManager::gen_tablet_list(const ObIArray &dml_stats, + const int64_t begin_idx, + const int64_t end_idx, + const bool is_from_direct_load, + ObSqlString &tablet_list) +{ + int ret = OB_SUCCESS; + tablet_list.reset(); + int64_t begin_ts = ObTimeUtility::current_time(); + ObSchemaGetterGuard schema_guard; + if (OB_UNLIKELY(begin_idx < 0 || end_idx < 0 || + begin_idx >= end_idx || end_idx > dml_stats.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(begin_idx), K(end_idx), K(dml_stats)); + } else if (OB_ISNULL(GCTX.schema_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(GCTX.schema_service_)); + } else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard(dml_stats.at(begin_idx).tenant_id_, schema_guard))) { + LOG_WARN("get tenant schema guard failed", K(ret)); + } else { + bool is_first = true; + for (int64_t i = begin_idx; OB_SUCC(ret) && i < end_idx; ++i) { + bool is_valid = is_from_direct_load && !is_inner_table(dml_stats.at(i).table_id_); + if (!is_valid && OB_FAIL(ObDbmsStatsUtils::check_is_stat_table(schema_guard, + dml_stats.at(i).tenant_id_, + dml_stats.at(i).table_id_, + is_valid))) { + LOG_WARN("failed to check is stat table", K(ret)); + } else if (is_valid) { + uint64_t ext_tenant_id = share::schema::ObSchemaUtils::get_extract_tenant_id(dml_stats.at(i).tenant_id_, dml_stats.at(i).tenant_id_); + uint64_t pure_table_id = share::schema::ObSchemaUtils::get_extract_schema_id(dml_stats.at(i).tenant_id_, dml_stats.at(i).table_id_); + if (OB_FAIL(tablet_list.append_fmt("%s(%lu, %lu, %ld)", is_first ? "(" : " ,", + ext_tenant_id, pure_table_id, + dml_stats.at(i).tablet_id_))) { + LOG_WARN("failed to append sql", K(ret)); + } else { + is_first = false; + } + } + } + if (OB_SUCC(ret) && !is_first) { + if (OB_FAIL(tablet_list.append(")"))) { + LOG_WARN("failed to append", K(ret)); + } + } + } + return ret; +} + +int ObOptStatMonitorManager::do_get_opt_stats_expired_table_info(const int64_t tenant_id, + const ObSqlString &where_str, + ObIArray &stale_infos) +{ + int ret = OB_SUCCESS; + ObSqlString select_sql; + if (OB_FAIL(select_sql.append_fmt("SELECT m.table_id, m.tablet_id, m.inserts "\ + "FROM %s m " \ + "LEFT JOIN %s up " \ + "ON m.table_id = up.table_id "\ + "AND up.pname = 'ASYNC_GATHER_STALE_RATIO' "\ + "JOIN %s gp "\ + "ON gp.sname = 'ASYNC_GATHER_STALE_RATIO' "\ + "WHERE (CASE WHEN m.last_inserts = 0 THEN 1 + cast(coalesce(up.valchar, gp.spare4) as signed) "\ + "ELSE m.inserts * 1.0 / m.last_inserts END) > cast(coalesce(up.valchar, gp.spare4) as signed) "\ + "AND (m.tenant_id, m.table_id, m.tablet_id) in %s", + share::OB_ALL_MONITOR_MODIFIED_TNAME, + share::OB_ALL_OPTSTAT_USER_PREFS_TNAME, + share::OB_ALL_OPTSTAT_GLOBAL_PREFS_TNAME, + where_str.ptr()))) { + LOG_WARN("failed to append fmt", K(ret)); + } else { + SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { + sqlclient::ObMySQLResult *client_result = NULL; + ObSQLClientRetryWeak sql_client_retry_weak(mysql_proxy_); + if (OB_FAIL(sql_client_retry_weak.read(proxy_result, tenant_id, select_sql.ptr()))) { + LOG_WARN("failed to execute sql", K(ret), K(select_sql)); + } else if (OB_ISNULL(client_result = proxy_result.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to execute sql", K(ret)); + } else { + while (OB_SUCC(ret) && OB_SUCC(client_result->next())) { + int64_t idx1 = 0; + int64_t idx2 = 1; + int64_t idx3 = 2; + ObObj obj1; + ObObj obj2; + ObObj obj3; + int64_t table_id = -1; + int64_t tablet_id = 0; + int64_t inserts = 0; + if (OB_FAIL(client_result->get_obj(idx1, obj1)) || + OB_FAIL(client_result->get_obj(idx2, obj2)) || + OB_FAIL(client_result->get_obj(idx3, obj3))) { + LOG_WARN("failed to get object", K(ret)); + } else if (OB_FAIL(obj1.get_int(table_id)) || + OB_FAIL(obj2.get_int(tablet_id)) || + OB_FAIL(obj3.get_int(inserts))) { + LOG_WARN("failed to get int", K(ret), K(obj1), K(obj2), K(inserts)); + } else { + bool is_found = false; + for (int64_t i = 0; !is_found && OB_SUCC(ret) && i < stale_infos.count(); ++i) { + if (table_id == stale_infos.at(i).table_id_) { + is_found = true; + if (OB_FAIL(stale_infos.at(i).tablet_ids_.push_back(tablet_id))) { + LOG_WARN("failed to push back", K(ret)); + } else { + stale_infos.at(i).inserts_ += inserts; + } + } + } + if (OB_SUCC(ret) && !is_found) { + OptStatExpiredTableInfo stale_info; + stale_info.tenant_id_ = tenant_id; + stale_info.table_id_ = table_id; + stale_info.inserts_ = inserts; + if (OB_FAIL(stale_info.tablet_ids_.push_back(tablet_id))) { + LOG_WARN("failed to push back", K(ret)); + } else if (OB_FAIL(stale_infos.push_back(stale_info))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } + } + ret = OB_ITER_END == ret ? OB_SUCCESS : ret; + } + int tmp_ret = OB_SUCCESS; + if (NULL != client_result) { + if (OB_SUCCESS != (tmp_ret = client_result->close())) { + LOG_WARN("close result set failed", K(ret), K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + } + } + LOG_TRACE("do get opt stats expired table info", K(select_sql), K(stale_infos)); + } + return ret; +} + +int ObOptStatMonitorManager::mark_the_opt_stat_expired(const OptStatExpiredTableInfo &expired_table_info) +{ + int ret = OB_SUCCESS; + ObSEArray part_infos; + ObSEArray subpart_infos; + ObSEArray partition_ids; + ObSEArray expired_partition_ids; + share::schema::ObPartitionLevel part_level = share::schema::ObPartitionLevel::PARTITION_LEVEL_MAX; + ObSEArray table_stats; + ObSEArray expired_table_stats; + ObSEArray no_table_stats; + ObArenaAllocator allocator("OptStatMonitor", OB_MALLOC_NORMAL_BLOCK_SIZE, expired_table_info.tenant_id_); + int64_t begin_ts = ObTimeUtility::current_time(); + int64_t async_stale_max_table_size = DEFAULT_ASYNC_STALE_MAX_TABLE_SIZE; + if (OB_FAIL(get_expired_table_part_info(allocator, expired_table_info, part_level, part_infos, subpart_infos))) { + LOG_WARN("failed to get expired table part info", K(ret)); + } else if (part_level == share::schema::ObPartitionLevel::PARTITION_LEVEL_MAX) { + //do nothing + } else if (OB_FAIL(get_need_check_opt_stat_partition_ids(expired_table_info, + part_infos, + subpart_infos, + partition_ids))) { + LOG_WARN("failed to get need check opt stat partition ids", K(ret)); + } else if (OB_FAIL(ObOptStatManager::get_instance().get_table_stat(expired_table_info.tenant_id_, + expired_table_info.table_id_, + partition_ids, + table_stats))) { + LOG_WARN("failed to get table stat", K(ret)); + } else if (OB_FAIL(get_async_stale_max_table_size(expired_table_info.tenant_id_, + expired_table_info.table_id_, + async_stale_max_table_size))) { + LOG_WARN("failed to get async stale max table size", K(ret)); + } else if (OB_UNLIKELY(async_stale_max_table_size <= 0)) { + LOG_INFO("skip to mark the opt stat expired", K(async_stale_max_table_size)); + } else if (OB_FAIL(get_need_mark_opt_stats_expired(table_stats, + expired_table_info, + async_stale_max_table_size, + begin_ts, + part_level, + part_infos, + subpart_infos, + expired_table_stats, + no_table_stats))) { + LOG_WARN("failed to get need mark opt stats expired", K(ret)); + } else if (OB_FAIL(do_mark_the_opt_stat_expired(expired_table_info.tenant_id_, + expired_table_stats, + expired_partition_ids))) { + LOG_WARN("failed to do mark the opt stat expired", K(ret)); + } else if (OB_FAIL(do_mark_the_opt_stat_missing(expired_table_info.tenant_id_, + no_table_stats))) { + LOG_WARN("failed to do mark the opt stat missing", K(ret)); + } else { + obrpc::ObUpdateStatCacheArg stat_arg; + stat_arg.tenant_id_ = expired_table_info.tenant_id_; + stat_arg.table_id_ = expired_table_info.table_id_; + if (OB_FAIL(append(stat_arg.partition_ids_, expired_partition_ids))) { + LOG_WARN("failed to append", K(ret)); + } else if (OB_FAIL(pl::ObDbmsStats::update_stat_cache(expired_table_info.tenant_id_, stat_arg))) { + LOG_WARN("failed to update stat cache", K(ret)); + } + } + return ret; +} + +int ObOptStatMonitorManager::get_expired_table_part_info(ObIAllocator &allocator, + const OptStatExpiredTableInfo &expired_table_info, + share::schema::ObPartitionLevel &part_level, + ObIArray &part_infos, + ObIArray &subpart_infos) +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *table_schema = NULL; + part_level = share::schema::ObPartitionLevel::PARTITION_LEVEL_MAX; + part_infos.reset(); + subpart_infos.reset(); + if (OB_ISNULL(GCTX.schema_service_) || OB_UNLIKELY(!expired_table_info.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(expired_table_info), K(GCTX.schema_service_)); + } else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard(expired_table_info.tenant_id_, schema_guard))) { + LOG_WARN("get tenant schema guard failed", K(ret)); + } else if (OB_FAIL(schema_guard.get_table_schema(expired_table_info.tenant_id_, + expired_table_info.table_id_, + table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(expired_table_info.tenant_id_), K(expired_table_info.table_id_)); + } else if (OB_ISNULL(table_schema)) {//maybe table isn't exists. + //do nothing + } else if (OB_FAIL(pl::ObDbmsStats::get_table_part_infos(table_schema, allocator, part_infos, subpart_infos))) { + LOG_WARN("failed to get table part infos", K(ret)); + } else { + part_level = table_schema->get_part_level(); + } + return ret; +} + +int ObOptStatMonitorManager::get_need_check_opt_stat_partition_ids(const OptStatExpiredTableInfo &expired_table_info, + ObIArray &part_infos, + ObIArray &subpart_infos, + ObIArray &partition_ids) +{ + int ret = OB_SUCCESS; + //non partition table + if (part_infos.empty() && subpart_infos.empty()) { + if (OB_FAIL(partition_ids.push_back(expired_table_info.table_id_))) { + LOG_WARN("failed to push back", K(ret)); + } + //partition table, global stat partition id is -1 + } else if (OB_FAIL(partition_ids.push_back(-1))) { + LOG_WARN("failed to push back", K(ret)); + } else if (!part_infos.empty() && subpart_infos.empty()) {//part table + if (expired_table_info.tablet_ids_.count() == part_infos.count()) { + for (int64_t i = 0; OB_SUCC(ret) && i < part_infos.count(); ++i) { + if (OB_FAIL(partition_ids.push_back(part_infos.at(i).part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < expired_table_info.tablet_ids_.count(); ++i) { + bool found_it = false; + for (int64_t j = 0; OB_SUCC(ret) && !found_it && j < part_infos.count(); ++j) { + if (expired_table_info.tablet_ids_.at(i) == static_cast(part_infos.at(j).tablet_id_.id())) { + if (OB_FAIL(partition_ids.push_back(part_infos.at(j).part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } else { + found_it = true; + } + } + } + } + } + } else if (!part_infos.empty() && !subpart_infos.empty()) {//subpart table + hash::ObHashMap partition_ids_map; + if (expired_table_info.tablet_ids_.count() == subpart_infos.count()) { + for (int64_t i = 0; OB_SUCC(ret) && i < part_infos.count(); ++i) { + if (OB_FAIL(partition_ids.push_back(part_infos.at(i).part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < subpart_infos.count(); ++i) { + if (OB_FAIL(partition_ids.push_back(subpart_infos.at(i).part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } else if (OB_FAIL(partition_ids_map.create(part_infos.count(), "PartIdsMap", "PartIdsMapNode", expired_table_info.tenant_id_))) { + LOG_WARN("fail to create hash map", K(ret), K(expired_table_info.tablet_ids_.count())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < expired_table_info.tablet_ids_.count(); ++i) { + bool found_it = false; + for (int64_t j = 0; OB_SUCC(ret) && !found_it && j < subpart_infos.count(); ++j) { + if (expired_table_info.tablet_ids_.at(i) == static_cast(subpart_infos.at(j).tablet_id_.id())) { + bool tmp_var = false; + if (OB_FAIL(partition_ids.push_back(subpart_infos.at(j).part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } else { + found_it = true; + if (OB_FAIL(partition_ids_map.get_refactored(subpart_infos.at(j).first_part_id_, tmp_var))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + if (OB_FAIL(partition_ids.push_back(subpart_infos.at(j).first_part_id_))) { + LOG_WARN("failed to push back", K(ret)); + } else if (OB_FAIL(partition_ids_map.set_refactored(subpart_infos.at(j).first_part_id_, true))) { + LOG_WARN("failed to set refactored", K(ret)); + } else {/*do nothing*/} + } else { + LOG_WARN("failed to get refactored", K(ret)); + } + } + } + } + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret)); + } + LOG_INFO("get need check opt stat partition ids", K(expired_table_info), K(part_infos), + K(subpart_infos), K(partition_ids)); + return ret; +} + +int ObOptStatMonitorManager::get_need_mark_opt_stats_expired(const ObIArray &table_stats, + const OptStatExpiredTableInfo &expired_table_info, + const int64_t async_stale_max_table_size, + const int64_t begin_ts, + const share::schema::ObPartitionLevel &part_level, + const ObIArray &part_infos, + const ObIArray &subpart_infos, + ObIArray &expired_table_stats, + ObIArray &no_table_stats) +{ + int ret = OB_SUCCESS; + bool have_table_stats = false; + for (int64_t i = 0; OB_SUCC(ret) && i < table_stats.count(); ++i) { + bool is_stat_expired = false; + have_table_stats |= table_stats.at(i).get_last_analyzed() > 0; + if (table_stats.at(i).get_last_analyzed() <= 0) { + if (!table_stats.at(i).is_stat_expired()) { + if (OB_FAIL(no_table_stats.push_back(table_stats.at(i)))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } else if (table_stats.at(i).is_stat_expired() || + table_stats.at(i).get_last_analyzed() >= begin_ts || + table_stats.at(i).get_row_count() > async_stale_max_table_size) { + //do nothing + } else if (part_level == share::schema::ObPartitionLevel::PARTITION_LEVEL_ZERO) { + is_stat_expired = true; + } else if (part_level == share::schema::ObPartitionLevel::PARTITION_LEVEL_ONE) { + if (table_stats.count() == part_infos.count() + 1 || + table_stats.at(i).get_object_type() == StatLevel::PARTITION_LEVEL) { + is_stat_expired = true; + } else if (table_stats.at(i).get_object_type() == StatLevel::TABLE_LEVEL) { + ObSEArray tablet_ids; + if (OB_FAIL(check_table_stat_expired_by_dml_info(expired_table_info.tenant_id_, + expired_table_info.table_id_, + tablet_ids, + is_stat_expired))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpcted error", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(table_stats.at(i))); + } + } else if (part_level == share::schema::ObPartitionLevel::PARTITION_LEVEL_TWO) { + if (table_stats.count() == part_infos.count() + subpart_infos.count() + 1 || + table_stats.at(i).get_object_type() == StatLevel::SUBPARTITION_LEVEL) { + is_stat_expired = true; + } else if (table_stats.at(i).get_object_type() == StatLevel::PARTITION_LEVEL) { + ObSEArray tablet_ids; + bool is_all_subpart_expired = true; + for (int64_t j = 0; OB_SUCC(ret) && j < subpart_infos.count(); ++j) { + if (table_stats.at(i).get_partition_id() == subpart_infos.at(j).first_part_id_) { + if (OB_FAIL(tablet_ids.push_back(subpart_infos.at(j).tablet_id_.id()))) { + LOG_WARN("failed to push back", K(ret)); + } else if (is_all_subpart_expired) { + bool found_it = false; + for (int64_t k = 0; !found_it && k < table_stats.count(); ++k) { + if (table_stats.at(i).get_partition_id() == subpart_infos.at(j).part_id_) { + found_it = true; + is_all_subpart_expired &= table_stats.at(i).get_last_analyzed() > 0; + } + } + is_all_subpart_expired &= found_it; + } + } + } + if (OB_SUCC(ret)) { + if (is_all_subpart_expired) { + is_stat_expired = true; + } else if (OB_FAIL(check_table_stat_expired_by_dml_info(expired_table_info.tenant_id_, + expired_table_info.table_id_, + tablet_ids, + is_stat_expired))) { + LOG_WARN("failed to check table stat expired by dml info", K(ret)); + } + } + } else if (table_stats.at(i).get_object_type() == StatLevel::TABLE_LEVEL) { + ObSEArray tablet_ids; + if (OB_FAIL(check_table_stat_expired_by_dml_info(expired_table_info.tenant_id_, + expired_table_info.table_id_, + tablet_ids, + is_stat_expired))) { + LOG_WARN("failed to check table stat expired by dml info", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(table_stats.at(i))); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(table_stats.at(i))); + } + if (OB_SUCC(ret) && is_stat_expired) { + if (OB_FAIL(expired_table_stats.push_back(table_stats.at(i)))) { + LOG_WARN("failed to push back", K(ret)); + } + } + if (OB_SUCC(ret) && have_table_stats) { + no_table_stats.reset(); + } + } + LOG_INFO("get need mark opt stats expired", K(expired_table_stats), K(no_table_stats)); + return ret; +} + +int ObOptStatMonitorManager::check_table_stat_expired_by_dml_info(const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &tablet_ids, + bool &is_stat_expired) +{ + int ret = OB_SUCCESS; + ObSqlString tablet_list; + is_stat_expired = false; + if (OB_FAIL(gen_tablet_list(tablet_ids, tablet_list))) { + LOG_WARN("failed to gen tablet list", K(ret)); + } else { + uint64_t ext_tenant_id = share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id); + uint64_t pure_table_id = share::schema::ObSchemaUtils::get_extract_schema_id(tenant_id, table_id); + ObSqlString select_sql; + if (OB_FAIL(select_sql.append_fmt("SELECT 1 "\ + "FROM (SELECT table_id,"\ + "sum(inserts-deletes) AS row_cnt,"\ + "sum(inserts+updates+deletes) AS total_modified_cnt,"\ + "sum(last_inserts+last_updates+last_deletes) AS last_modified_cnt "\ + "from %s "\ + "WHERE tenant_id = %lu "\ + "AND table_id = %lu %s%s "\ + "GROUP BY table_id) m "\ + "LEFT JOIN %s up "\ + "ON m.table_id = up.table_id "\ + "AND up.pname = 'ASYNC_GATHER_STALE_RATIO' "\ + "JOIN %s gp "\ + "ON gp.sname = 'ASYNC_GATHER_STALE_RATIO' "\ + "WHERE (CASE WHEN last_modified_cnt = 0 THEN 1 + cast(coalesce(up.valchar, gp.spare4) as signed) "\ + "ELSE total_modified_cnt * 1.0 / last_modified_cnt END) > cast(COALESCE(up.valchar, gp.spare4) AS signed) "\ + "AND row_cnt > 0;", + share::OB_ALL_MONITOR_MODIFIED_TNAME, + ext_tenant_id, + pure_table_id, + tablet_list.empty() ? " " : " AND tablet_id in ", + tablet_list.empty() ? " " : tablet_list.ptr(), + share::OB_ALL_OPTSTAT_USER_PREFS_TNAME, + share::OB_ALL_OPTSTAT_GLOBAL_PREFS_TNAME + ))) { + LOG_WARN("failed to append fmt", K(ret)); + } else { + LOG_TRACE("check table stat expired by dml info", K(select_sql)); + SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { + sqlclient::ObMySQLResult *client_result = NULL; + ObSQLClientRetryWeak sql_client_retry_weak(mysql_proxy_); + if (OB_FAIL(sql_client_retry_weak.read(proxy_result, tenant_id, select_sql.ptr()))) { + LOG_WARN("failed to execute sql", K(ret), K(select_sql)); + } else if (OB_ISNULL(client_result = proxy_result.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to execute sql", K(ret)); + } else { + while (OB_SUCC(ret) && !is_stat_expired && OB_SUCC(client_result->next())) { + is_stat_expired = true; + } + ret = OB_ITER_END == ret ? OB_SUCCESS : ret; + } + int tmp_ret = OB_SUCCESS; + if (NULL != client_result) { + if (OB_SUCCESS != (tmp_ret = client_result->close())) { + LOG_WARN("close result set failed", K(ret), K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + } + } + } + LOG_TRACE("check_table_stat_expired_by_dml_info end", K(is_stat_expired)); + } + return ret; +} + +int ObOptStatMonitorManager::gen_tablet_list(const ObIArray &tablet_ids, + ObSqlString &tablet_list) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids.count(); ++i) { + char prefix = (i == 0 ? '(' : ' '); + char suffix = (i == tablet_ids.count() - 1 ? ')' : ','); + if (OB_FAIL(tablet_list.append_fmt("%c%lu%c", prefix, tablet_ids.at(i), suffix))) { + LOG_WARN("failed to append sql", K(ret)); + } else {/*do nothing*/} + } + return ret; +} + +int ObOptStatMonitorManager::do_mark_the_opt_stat_missing(const uint64_t tenant_id, + const ObIArray &no_table_stats) +{ + int ret = OB_SUCCESS; + if (!no_table_stats.empty()) { + int64_t begin_idx = 0; + ObMySQLTransaction trans; + if (OB_ISNULL(mysql_proxy_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(mysql_proxy_)); + } else if (OB_FAIL(trans.start(mysql_proxy_, tenant_id))) { + LOG_WARN("fail to start transaction", K(ret)); + } else { + while (OB_SUCC(ret) && begin_idx < no_table_stats.count()) { + ObSqlString insert_sql; + ObSqlString values_list; + int64_t affected_rows = 0; + int64_t end_idx = std::min(begin_idx + MAX_PROCESS_BATCH_TABLET_CNT, no_table_stats.count()); + if (OB_FAIL(gen_values_list(tenant_id, no_table_stats, begin_idx, end_idx, values_list))) { + LOG_WARN("failed to gen values list", K(ret)); + } else if (OB_UNLIKELY(values_list.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(values_list)); + } else if (OB_FAIL(insert_sql.append_fmt(INSERT_STALE_TABLE_STAT_SQL, + share::OB_ALL_TABLE_STAT_TNAME, + values_list.ptr()))) { + } else if (OB_FAIL(trans.write(tenant_id, insert_sql.ptr(), affected_rows))) { + LOG_WARN("fail to exec sql", K(insert_sql), K(ret)); + } else { + begin_idx = end_idx; + LOG_INFO("Succeed to do mark the opt stat expired", K(insert_sql), K(no_table_stats), K(affected_rows)); + } + } + //end gather trans + if (OB_SUCC(ret)) { + if (OB_FAIL(trans.end(true))) { + LOG_WARN("fail to commit transaction", K(ret)); + } + } else { + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = trans.end(false))) { + LOG_WARN("fail to roll back transaction", K(tmp_ret)); + } + } + } + } + return ret; +} + +int ObOptStatMonitorManager::do_mark_the_opt_stat_expired(const uint64_t tenant_id, + const ObIArray &expired_table_stats, + ObIArray &expired_partition_ids) +{ + int ret = OB_SUCCESS; + int64_t begin_idx = 0; + if (OB_ISNULL(mysql_proxy_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(mysql_proxy_)); + } + while (OB_SUCC(ret) && begin_idx < expired_table_stats.count()) { + ObSqlString update_sql; + ObSqlString same_part_analyzed_list; + ObSqlString diff_part_analyzed_list; + int64_t affected_rows = 0; + int64_t end_idx = std::min(begin_idx + MAX_PROCESS_BATCH_TABLET_CNT, expired_table_stats.count()); + uint64_t ext_tenant_id = share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id); + uint64_t pure_table_id = share::schema::ObSchemaUtils::get_extract_schema_id(tenant_id, expired_table_stats.at(begin_idx).get_table_id()); + if (OB_FAIL(gen_part_analyzed_list(expired_table_stats, begin_idx, end_idx, + same_part_analyzed_list, + diff_part_analyzed_list, + expired_partition_ids))) { + LOG_WARN("failed to gen part analyzed list", K(ret)); + } else if (OB_FAIL(update_sql.append_fmt("update /*+QUERY_TIMEOUT(60000000)*/%s set stale_stats = 1 where tenant_id = %lu and table_id = %lu and %s", + share::OB_ALL_TABLE_STAT_TNAME, + ext_tenant_id, + pure_table_id, + !same_part_analyzed_list.empty() ? same_part_analyzed_list.ptr() : diff_part_analyzed_list.ptr()))) { + } else if (OB_FAIL(mysql_proxy_->write(tenant_id, update_sql.ptr(), affected_rows))) { + LOG_WARN("fail to exec sql", K(update_sql), K(ret)); + } else { + begin_idx = end_idx; + LOG_INFO("Succeed to do mark the opt stat expired", K(update_sql), K(expired_table_stats), K(affected_rows)); + } + } + return ret; +} + +int ObOptStatMonitorManager::gen_part_analyzed_list(const ObIArray &expired_table_stats, + const int64_t begin_idx, + const int64_t end_idx, + ObSqlString &same_part_analyzed_list, + ObSqlString &diff_part_analyzed_list, + ObIArray &expired_partition_ids) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(begin_idx < 0 || end_idx < 0 || + begin_idx >= end_idx || end_idx > expired_table_stats.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(begin_idx), K(end_idx), K(expired_table_stats)); + } else { + int64_t last_analyzed = -1; + for (int64_t i = begin_idx; OB_SUCC(ret) && i < end_idx; ++i) { + char suffix = (i == end_idx - 1 ? ')' : ','); + if (OB_FAIL(expired_partition_ids.push_back(expired_table_stats.at(i).get_partition_id()))) { + LOG_WARN("failed to push back", K(ret)); + } else if (OB_FAIL(diff_part_analyzed_list.append_fmt("%s(%ld,usec_to_time(%ld))%c", i == begin_idx ? "(partition_id, last_analyzed) in (" : " ", + expired_table_stats.at(i).get_partition_id(), + expired_table_stats.at(i).get_last_analyzed(), + suffix))) { + LOG_WARN("failed to append sql", K(ret)); + } else if (i == begin_idx || last_analyzed == expired_table_stats.at(i).get_last_analyzed()) { + last_analyzed = expired_table_stats.at(i).get_last_analyzed(); + if (OB_FAIL(same_part_analyzed_list.append_fmt("%s%ld%c", i == begin_idx ? "partition_id in (" : " ", + expired_table_stats.at(i).get_partition_id(), + suffix))) { + LOG_WARN("failed to append sql", K(ret)); + } else if (i == end_idx - 1) { + if (OB_FAIL(same_part_analyzed_list.append_fmt(" AND last_analyzed = usec_to_time(%ld)", last_analyzed))) { + LOG_WARN("failed to append sql", K(ret)); + } else { + diff_part_analyzed_list.reset(); + } + } + } else { + last_analyzed = -1; + same_part_analyzed_list.reset(); + } + } + } + return ret; +} + +int ObOptStatMonitorManager::gen_values_list(const uint64_t tenant_id, + const ObIArray &no_table_stats, + const int64_t begin_idx, + const int64_t end_idx, + ObSqlString &values_list) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(begin_idx < 0 || end_idx < 0 || + begin_idx >= end_idx || end_idx > no_table_stats.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(begin_idx), K(end_idx), K(no_table_stats)); + } else { + for (int64_t i = begin_idx; OB_SUCC(ret) && i < end_idx; ++i) { + ObSqlString value; + uint64_t ext_tenant_id = share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id); + uint64_t pure_table_id = share::schema::ObSchemaUtils::get_extract_schema_id(tenant_id, no_table_stats.at(i).get_table_id()); + if (OB_FAIL(value.append_fmt(STALE_TABLE_STAT_MOCK_VALUE_PATTERN, + ext_tenant_id, + pure_table_id, + no_table_stats.at(i).get_partition_id()))) { + LOG_WARN("failed to append fmt", K(ret)); + } else if (OB_FAIL(values_list.append_fmt("%s%s", i == begin_idx ? " " : ", ", value.ptr()))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } + return ret; +} + +int ObOptStatMonitorManager::get_async_stale_max_table_size(const uint64_t tenant_id, + const uint64_t table_id, + int64_t &async_stale_max_table_size) +{ + int ret = OB_SUCCESS; + ObArenaAllocator tmp_alloc("OptStatPrefs", OB_MALLOC_NORMAL_BLOCK_SIZE, tenant_id); + ObAsyncStaleMaxTableSizePrefs prefs; + ObString opt_name(prefs.get_stat_pref_name()); + ObObj result; + ObObj dest_obj; + ObCastCtx cast_ctx(&tmp_alloc, NULL, CM_NONE, ObCharset::get_system_collation()); + async_stale_max_table_size = DEFAULT_ASYNC_MAX_SCAN_ROWCOUNT; + if (OB_FAIL(ObDbmsStatsPreferences::get_prefs(mysql_proxy_, tmp_alloc, tenant_id, + table_id, opt_name, result))) { + LOG_WARN("failed to get prefs", K(ret)); + } else if (result.is_null()) { + //do nothing + } else if (OB_FAIL(ObObjCaster::to_type(ObNumberType, cast_ctx, result, dest_obj))) { + LOG_WARN("failed to type", K(ret), K(result)); + } else if (OB_FAIL(dest_obj.get_number().extract_valid_int64_with_trunc(async_stale_max_table_size))) { + LOG_WARN("failed to extract valid int64 with trunc", K(ret), K(result)); + } else if (async_stale_max_table_size < 0) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal async stale max table size", K(ret), K(async_stale_max_table_size)); + } + LOG_TRACE("get_async_stale_max_table_size", K(async_stale_max_table_size), K(result)); + return ret; +} + +} +} \ No newline at end of file diff --git a/src/share/stat/ob_opt_stat_monitor_manager.h b/src/share/stat/ob_opt_stat_monitor_manager.h index df1e48ac04..7a21c1ef7f 100644 --- a/src/share/stat/ob_opt_stat_monitor_manager.h +++ b/src/share/stat/ob_opt_stat_monitor_manager.h @@ -65,6 +65,17 @@ public: ObOptStatMonitorManager *optstat_monitor_mgr_; }; +struct OptStatExpiredTableInfo +{ + OptStatExpiredTableInfo() : tenant_id_(0), table_id_(0), tablet_ids_(), inserts_(0) {} + bool is_valid() const { return tenant_id_ > 0 && table_id_ > 0 && !tablet_ids_.empty(); } + uint64_t tenant_id_; + uint64_t table_id_; + ObSEArray tablet_ids_; + uint64_t inserts_; + TO_STRING_KV(K(tenant_id_), K(table_id_), K(tablet_ids_), K(inserts_)); +}; + class ObOptStatMonitorManager { friend class ObOptStatMonitorFlushAllTask; @@ -142,11 +153,67 @@ public: ObOptStatMonitorFlushAllTask &get_flush_all_task() { return flush_all_task_; } ObOptStatMonitorCheckTask &get_check_task() { return check_task_; } int init(uint64_t tenant_id); + int check_opt_stats_expired(ObIArray &dml_stats, bool is_from_direct_load = false); + int get_opt_stats_expired_table_info(ObIArray &dml_stats, + ObIArray &stale_infos, + bool is_from_direct_load); + int gen_tablet_list(const ObIArray &dml_stats, + const int64_t begin_idx, + const int64_t end_idx, + const bool is_from_direct_load, + ObSqlString &tablet_list); + int do_get_opt_stats_expired_table_info(const int64_t tenant_id, + const ObSqlString &where_str, + ObIArray &stale_infos); + int mark_the_opt_stat_expired(const OptStatExpiredTableInfo &expired_table_info); + int get_expired_table_part_info(ObIAllocator &allocator, + const OptStatExpiredTableInfo &expired_table_info, + share::schema::ObPartitionLevel &part_level, + ObIArray &part_infos, + ObIArray &subpart_infos); + int get_need_check_opt_stat_partition_ids(const OptStatExpiredTableInfo &expired_table_info, + ObIArray &part_infos, + ObIArray &subpart_infos, + ObIArray &partition_ids); + int check_table_stat_expired_by_dml_info(const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &tablet_ids, + bool &is_stat_expired); + int get_need_mark_opt_stats_expired(const ObIArray &table_stats, + const OptStatExpiredTableInfo &expired_table_info, + const int64_t async_stale_max_table_size, + const int64_t begin_ts, + const share::schema::ObPartitionLevel &part_level, + const ObIArray &part_infos, + const ObIArray &subpart_infos, + ObIArray &expired_table_stats, + ObIArray &no_table_stats); + int gen_tablet_list(const ObIArray &tablet_ids, ObSqlString &tablet_list); + int do_mark_the_opt_stat_expired(const uint64_t tenant_id, + const ObIArray &expired_table_stats, + ObIArray &expired_partition_ids); + int do_mark_the_opt_stat_missing(const uint64_t tenant_id, + const ObIArray &no_table_stats); + int gen_part_analyzed_list(const ObIArray &expired_table_stats, + const int64_t begin_idx, + const int64_t end_idx, + ObSqlString &same_part_analyzed_list, + ObSqlString &diff_part_analyzed_list, + ObIArray &expired_partition_ids); + int gen_values_list(const uint64_t tenant_id, + const ObIArray &no_table_stats, + const int64_t begin_idx, + const int64_t end_idx, + ObSqlString &values_list); + int get_async_stale_max_table_size(const uint64_t tenant_id, + const uint64_t table_id, + int64_t &async_stale_max_table_size); private: DISALLOW_COPY_AND_ASSIGN(ObOptStatMonitorManager); const static int64_t UPDATE_OPT_STAT_BATCH_CNT = 200; const static int64_t info_count = 8; + const static int64_t MAX_PROCESS_BATCH_TABLET_CNT = 1000; bool inited_; uint64_t tenant_id_; int tg_id_; diff --git a/src/share/stat/ob_opt_stat_sql_service.cpp b/src/share/stat/ob_opt_stat_sql_service.cpp index cb649d7543..5ae9e3f8f9 100644 --- a/src/share/stat/ob_opt_stat_sql_service.cpp +++ b/src/share/stat/ob_opt_stat_sql_service.cpp @@ -350,6 +350,7 @@ int ObOptStatSqlService::fetch_table_stat(const uint64_t tenant_id, "macro_blk_cnt as macro_block_num, " "micro_blk_cnt as micro_block_num, " "stattype_locked as stattype_locked," + "stale_stats as stale_stats," "last_analyzed," "spare1 as sample_size FROM %s ", share::OB_ALL_TABLE_STAT_TNAME))) { LOG_WARN("fail to append SQL stmt string.", K(sql), K(ret)); @@ -411,6 +412,7 @@ int ObOptStatSqlService::batch_fetch_table_stats(sqlclient::ObISQLConnection *co "macro_blk_cnt as macro_block_num, " "micro_blk_cnt as micro_block_num, " "stattype_locked as stattype_locked," + "stale_stats as stale_stats," "last_analyzed," "spare1 as sample_size FROM %s", share::OB_ALL_TABLE_STAT_TNAME))) { LOG_WARN("fail to append SQL stmt string.", K(sql), K(ret)); @@ -1183,6 +1185,7 @@ int ObOptStatSqlService::fill_table_stat(common::sqlclient::ObMySQLResult &resul EXTRACT_INT_FIELD_TO_CLASS_MYSQL(result, macro_block_num, stat, int64_t); EXTRACT_INT_FIELD_TO_CLASS_MYSQL(result, micro_block_num, stat, int64_t); EXTRACT_INT_FIELD_TO_CLASS_MYSQL(result, stattype_locked, stat, int64_t); + EXTRACT_INT_FIELD_TO_CLASS_MYSQL(result, stale_stats, stat, int64_t); if (OB_SUCCESS != (ret = result.get_timestamp("last_analyzed", NULL, int_value))) { LOG_WARN("fail to get column in row. ", "column_name", "last_analyzed", K(ret)); } else { diff --git a/src/share/stat/ob_opt_table_stat.h b/src/share/stat/ob_opt_table_stat.h index 2210e21b20..a3e3fec7cc 100644 --- a/src/share/stat/ob_opt_table_stat.h +++ b/src/share/stat/ob_opt_table_stat.h @@ -137,7 +137,8 @@ public: modified_count_(0), sample_size_(0), tablet_id_(ObTabletID::INVALID_TABLET_ID), - stat_expired_time_(-1) {} + stat_expired_time_(-1), + stale_stats_(0) {} ObOptTableStat(uint64_t table_id, int64_t partition_id, int64_t object_type, @@ -169,7 +170,8 @@ public: modified_count_(0), sample_size_(0), tablet_id_(ObTabletID::INVALID_TABLET_ID), - stat_expired_time_(-1) {} + stat_expired_time_(-1), + stale_stats_(false) {} virtual ~ObOptTableStat() {} @@ -237,6 +239,9 @@ public: int64_t get_stat_expired_time() const { return stat_expired_time_; } void set_stat_expired_time(int64_t expired_time) { stat_expired_time_ = expired_time; } + bool is_stat_expired() const { return stale_stats_; } + void set_stale_stats(int64_t stale_stats) { stale_stats_ = stale_stats > 0; } + bool is_locked() const { return stattype_locked_ > 0; } void add_row_count(int64_t rc) { row_count_ += rc; } @@ -318,6 +323,7 @@ public: sample_size_ = 0; tablet_id_ = ObTabletID::INVALID_TABLET_ID; stat_expired_time_ = -1; + stale_stats_ = false; } TO_STRING_KV(K(table_id_), @@ -338,7 +344,8 @@ public: K(modified_count_), K(sample_size_), K(tablet_id_), - K(stat_expired_time_)); + K(stat_expired_time_), + K(stale_stats_)); private: uint64_t table_id_; @@ -362,6 +369,7 @@ private: int64_t sample_size_; uint64_t tablet_id_;//now only use estimate table rowcnt by meta table. int64_t stat_expired_time_;//mark the stat in cache is arrived expired time, if arrived at expired time need reload, -1 meanings no expire forever. + bool stale_stats_;//mark the stat is expired or not. }; } diff --git a/src/share/stat/ob_stat_define.cpp b/src/share/stat/ob_stat_define.cpp index 02b77bc509..342fb79728 100644 --- a/src/share/stat/ob_stat_define.cpp +++ b/src/share/stat/ob_stat_define.cpp @@ -181,6 +181,14 @@ int ObTableStatParam::assign(const ObTableStatParam &other) is_temp_table_ = other.is_temp_table_; allocator_ = other.allocator_; ref_table_type_ = other.ref_table_type_; + is_async_gather_ = other.is_async_gather_; + async_gather_sample_size_ = other.async_gather_sample_size_; + async_full_table_size_ = other.async_full_table_size_; + async_partition_ids_ = other.async_partition_ids_; + hist_sample_info_.is_sample_ = other.hist_sample_info_.is_sample_; + hist_sample_info_.is_block_sample_ = other.hist_sample_info_.is_block_sample_; + hist_sample_info_.sample_type_ = other.hist_sample_info_.sample_type_; + hist_sample_info_.sample_value_ = other.hist_sample_info_.sample_value_; if (OB_FAIL(part_infos_.assign(other.part_infos_))) { LOG_WARN("failed to assign", K(ret)); } else if (OB_FAIL(subpart_infos_.assign(other.subpart_infos_))) { @@ -226,6 +234,10 @@ int ObTableStatParam::assign_common_property(const ObTableStatParam &other) duration_time_ = other.duration_time_; allocator_ = other.allocator_; online_sample_percent_ = other.online_sample_percent_; + hist_sample_info_.is_sample_ = other.hist_sample_info_.is_sample_; + hist_sample_info_.is_block_sample_ = other.hist_sample_info_.is_block_sample_; + hist_sample_info_.sample_type_ = other.hist_sample_info_.sample_type_; + hist_sample_info_.sample_value_ = other.hist_sample_info_.sample_value_; return ret; } @@ -254,6 +266,10 @@ int ObOptStatGatherParam::assign(const ObOptStatGatherParam &other) global_part_id_ = other.global_part_id_; gather_vectorize_ = other.gather_vectorize_; sepcify_scn_ = other.sepcify_scn_; + hist_sample_info_.is_sample_ = other.hist_sample_info_.is_sample_; + hist_sample_info_.is_block_sample_ = other.hist_sample_info_.is_block_sample_; + hist_sample_info_.sample_type_ = other.hist_sample_info_.sample_type_; + hist_sample_info_.sample_value_ = other.hist_sample_info_.sample_value_; if (OB_FAIL(partition_infos_.assign(other.partition_infos_))) { LOG_WARN("failed to assign", K(ret)); } else if (OB_FAIL(column_params_.assign(other.column_params_))) { @@ -262,7 +278,7 @@ int ObOptStatGatherParam::assign(const ObOptStatGatherParam &other) return ret; } -bool ObTableStatParam::is_specify_partition_gather() const +bool ObTableStatParam::is_specify_partition() const { bool is_specify = false; if (part_level_ == share::schema::PARTITION_LEVEL_ZERO) { @@ -276,7 +292,7 @@ bool ObTableStatParam::is_specify_partition_gather() const return is_specify; } -bool ObTableStatParam::is_specify_column_gather() const +bool ObTableStatParam::is_specify_column() const { bool is_specify = false; for (int64_t i = 0; !is_specify && i < column_params_.count(); ++i) { @@ -307,6 +323,13 @@ int64_t ObOptStatGatherParam::get_need_gather_column() const return valid_column; } +int AsyncStatTable::assign(const AsyncStatTable &other) +{ + int ret = OB_SUCCESS; + table_id_ = other.table_id_; + return partition_ids_.assign(other.partition_ids_); +} + OB_SERIALIZE_MEMBER(ObOptDmlStat, tenant_id_, table_id_, diff --git a/src/share/stat/ob_stat_define.h b/src/share/stat/ob_stat_define.h index 23ef64d040..dab90f1435 100644 --- a/src/share/stat/ob_stat_define.h +++ b/src/share/stat/ob_stat_define.h @@ -51,7 +51,12 @@ enum StatOptionFlags OPT_FORCE = 1 << 11, OPT_APPROXIMATE_NDV = 1 << 12, OPT_ESTIMATE_BLOCK = 1 << 13, - OPT_STAT_OPTION_ALL = (1 << 14) -1 + OPT_ASYNC_GATHER_STALE_RATION = 1 << 14, + OPT_ASYNC_GATHER_SAMPLE_SIZE = 1 << 15, + OPT_ASYNC_GATHER_FULL_TABLE_SIZE = 1 << 16, + OPT_HIST_EST_PERCENT = 1 << 17, + OPT_HIST_BLOCK_SAMPLE = 1 << 18, + OPT_STAT_OPTION_ALL = (1 << 19) -1 }; const static double OPT_DEFAULT_STALE_PERCENT = 0.1; const static int64_t OPT_DEFAULT_STATS_RETENTION = 31; @@ -66,6 +71,15 @@ const static int64_t MAX_NUM_OF_WRITE_STATS = 2000; const static int64_t DEFAULT_STAT_GATHER_VECTOR_BATCH_SIZE = 256; const static int64_t MIN_GATHER_WORK_ARANA_SIZE = 10 * 1024L * 1024L; //10M const int64_t MAX_OPT_STATS_PROCESS_RPC_TIMEOUT = 300000000;//one optimizer stats processing rpc time should not exceed 300 seconds +const static int64_t DEFAULT_ASYNC_SAMPLE_SIZE = 1000000; +const static int64_t DEFAULT_ASYNC_FULL_TABLE_SIZE = 10000000; +const static int64_t DEFAULT_ASYNC_MIN_TABLE_SIZE = 10000; +const static int64_t DEFAULT_ASYNC_STALE_MAX_TABLE_SIZE = 100000000; +const static int64_t DEFAULT_ASYNC_MAX_SCAN_ROWCOUNT = 100000000; +const static int64_t MINIMUM_OF_ASYNC_GATHER_STALE_RATIO = 2; +const int64_t MAXIMUM_BLOCK_CNT_OF_ROW_SAMPLE_GATHER_HYBRID_HIST = 100000; +const int64_t MAXIMUM_ROWS_OF_ROW_SAMPLE_GATHER_HYBRID_HIST = 10000000; +const int64_t MINIMUM_BLOCK_CNT_OF_BLOCK_SAMPLE_HYBRID_HIST = 16; enum StatLevel { @@ -156,7 +170,7 @@ struct BlockNumStat K(cg_macro_cnt_arr_), K(cg_micro_cnt_arr_), K(sstable_row_cnt_), - K(memtable_row_cnt_)) + K(memtable_row_cnt_)); }; //TODO@jiangxiu.wt: improve the expression of PartInfo, use the map is better. @@ -193,28 +207,55 @@ struct StatTable database_id_(OB_INVALID_ID), table_id_(OB_INVALID_ID), stale_percent_(0.0), - partition_stat_infos_() + partition_stat_infos_(), + is_async_gather_(false), + async_partition_ids_() { partition_stat_infos_.set_attr(lib::ObMemAttr(MTL_ID(), "StatTable")); + async_partition_ids_.set_attr(lib::ObMemAttr(MTL_ID(), "StatTable")); } - StatTable(uint64_t database_id, uint64_t table_id) : + StatTable(uint64_t database_id, uint64_t table_id, bool is_async_gather = false) : database_id_(database_id), table_id_(table_id), stale_percent_(0.0), - partition_stat_infos_() + partition_stat_infos_(), + is_async_gather_(is_async_gather), + async_partition_ids_() { partition_stat_infos_.set_attr(lib::ObMemAttr(MTL_ID(), "StatTable")); + async_partition_ids_.set_attr(lib::ObMemAttr(MTL_ID(), "StatTable")); } bool operator<(const StatTable &other) const; int assign(const StatTable &other); TO_STRING_KV(K_(database_id), K_(table_id), K_(stale_percent), - K_(partition_stat_infos)); + K_(partition_stat_infos), + K_(is_async_gather), + K_(async_partition_ids)); uint64_t database_id_; uint64_t table_id_; double stale_percent_; ObArray partition_stat_infos_; + bool is_async_gather_; + ObArray async_partition_ids_; +}; + +struct AsyncStatTable +{ + AsyncStatTable() : + table_id_(OB_INVALID_ID), + partition_ids_() + {} + AsyncStatTable(int64_t table_id) : + table_id_(table_id), + partition_ids_() + {} + int assign(const AsyncStatTable &other); + TO_STRING_KV(K_(table_id), + K_(partition_ids)); + uint64_t table_id_; + ObArray partition_ids_; }; enum ObStatTableType { @@ -346,6 +387,7 @@ struct ObAnalyzeSampleInfo void set_percent(double percent); void set_rows(double row_num); void set_is_block_sample(bool is_block_sample) { is_block_sample_ = is_block_sample; } + bool is_specify_sample() const { return is_sample_ && sample_value_ >= 0.000001 && sample_value_ < 100.0; } TO_STRING_KV(K_(is_sample), K_(is_block_sample), @@ -462,7 +504,12 @@ struct ObTableStatParam { allocator_(NULL), ref_table_type_(share::schema::ObTableType::MAX_TABLE_TYPE), column_group_params_(), - online_sample_percent_(1.) + online_sample_percent_(1.), + is_async_gather_(false), + async_gather_sample_size_(DEFAULT_ASYNC_SAMPLE_SIZE), + async_full_table_size_(DEFAULT_ASYNC_FULL_TABLE_SIZE), + async_partition_ids_(NULL), + hist_sample_info_() {} int assign(const ObTableStatParam &other); @@ -478,9 +525,9 @@ struct ObTableStatParam { return global_data_part_id_ != INVALID_GLOBAL_PART_ID; } - bool is_specify_partition_gather() const; + bool is_specify_partition() const; - bool is_specify_column_gather() const; + bool is_specify_column() const; int64_t get_need_gather_column() const; @@ -488,6 +535,8 @@ struct ObTableStatParam { part_stat_param_.need_modify_ || subpart_stat_param_.need_modify_; } + bool is_specify_sample() const { return sample_info_.is_specify_sample(); } + uint64_t tenant_id_; ObString db_name_; @@ -543,6 +592,11 @@ struct ObTableStatParam { share::schema::ObTableType ref_table_type_; ObArray column_group_params_; double online_sample_percent_; + bool is_async_gather_; + int64_t async_gather_sample_size_; + int64_t async_full_table_size_; + const ObIArray *async_partition_ids_; + ObAnalyzeSampleInfo hist_sample_info_; TO_STRING_KV(K(tenant_id_), K(db_name_), @@ -587,7 +641,12 @@ struct ObTableStatParam { K(is_temp_table_), K(ref_table_type_), K(column_group_params_), - K(online_sample_percent_)); + K(online_sample_percent_), + K(is_async_gather_), + K(async_gather_sample_size_), + K(async_full_table_size_), + KPC(async_partition_ids_), + K(hist_sample_info_)); }; struct ObOptStatGatherParam { @@ -615,7 +674,11 @@ struct ObOptStatGatherParam { gather_vectorize_(DEFAULT_STAT_GATHER_VECTOR_BATCH_SIZE), sepcify_scn_(0), use_column_store_(false), - is_specify_partition_(false) + is_specify_partition_(false), + is_async_gather_(false), + async_gather_sample_size_(DEFAULT_ASYNC_SAMPLE_SIZE), + async_full_table_size_(DEFAULT_ASYNC_FULL_TABLE_SIZE), + hist_sample_info_() {} int assign(const ObOptStatGatherParam &other); int64_t get_need_gather_column() const; @@ -643,6 +706,10 @@ struct ObOptStatGatherParam { uint64_t sepcify_scn_; bool use_column_store_; bool is_specify_partition_; + int64_t is_async_gather_; + int64_t async_gather_sample_size_; + int64_t async_full_table_size_; + ObAnalyzeSampleInfo hist_sample_info_; TO_STRING_KV(K(tenant_id_), K(db_name_), @@ -665,7 +732,11 @@ struct ObOptStatGatherParam { K(gather_vectorize_), K(sepcify_scn_), K(use_column_store_), - K(is_specify_partition_)); + K(is_specify_partition_), + K(is_async_gather_), + K(async_gather_sample_size_), + K(async_full_table_size_), + K(hist_sample_info_)); }; struct ObOptStat diff --git a/src/share/stat/ob_stat_item.cpp b/src/share/stat/ob_stat_item.cpp index f58e58e8ae..94d95caf42 100644 --- a/src/share/stat/ob_stat_item.cpp +++ b/src/share/stat/ob_stat_item.cpp @@ -234,8 +234,6 @@ bool ObStatTopKHist::is_needed() const int ObStatTopKHist::gen_expr(char *buf, const int64_t buf_len, int64_t &pos) { int ret = OB_SUCCESS; - const int64_t MIN_BUCKET_SIZE = 256; - const int64_t MAX_BUCKET_SIZE = 2048; if (OB_ISNULL(col_param_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("column param is null", K(ret), K(col_param_)); @@ -247,7 +245,7 @@ int ObStatTopKHist::gen_expr(char *buf, const int64_t buf_len, int64_t &pos) ret = OB_ERR_INVALID_SIZE_SPECIFIED; LOG_WARN("get invalid argument, expected value in the range[1, 2048]", K(ret), K(bkt_num)); } - double err_rate = 1.0 / (1000 * (bkt_num / MIN_BUCKET_SIZE)); + double err_rate = 1.0 / get_window_size(bkt_num); if (OB_SUCC(ret)) { if (OB_FAIL(databuff_printf(buf, buf_len, pos, lib::is_oracle_mode() ? " TOP_K_FRE_HIST(%lf, \"%.*s\", %ld, %ld)" : diff --git a/src/share/stat/ob_stat_item.h b/src/share/stat/ob_stat_item.h index 17398afe18..b6179ccd96 100644 --- a/src/share/stat/ob_stat_item.h +++ b/src/share/stat/ob_stat_item.h @@ -223,6 +223,8 @@ public: class ObStatTopKHist : public ObStatColItem { + const static int64_t MIN_BUCKET_SIZE = 256; + const static int64_t MAX_BUCKET_SIZE = 2048; public: ObStatTopKHist() : ObStatColItem(), tab_stat_(NULL), max_disuse_cnt_(0) {} ObStatTopKHist(const ObColumnStatParam *param, @@ -259,6 +261,8 @@ public: virtual bool is_needed() const override; virtual int gen_expr(char *buf, const int64_t buf_len, int64_t &pos) override; virtual int decode(ObObj &obj, ObIAllocator &allocator) override; + static int64_t get_window_size(int64_t bucket_num) { + return 1000 * (bucket_num < MIN_BUCKET_SIZE ? 1 : bucket_num / MIN_BUCKET_SIZE); } protected: ObOptTableStat *tab_stat_; int64_t max_disuse_cnt_; @@ -301,7 +305,7 @@ public: ObGlobalTableStat() : row_count_(0), row_size_(0), data_size_(0), macro_block_count_(0), micro_block_count_(0), part_cnt_(0), last_analyzed_(0), - cg_macro_cnt_arr_(), cg_micro_cnt_arr_(), stat_locked_(false), + cg_macro_cnt_arr_(), cg_micro_cnt_arr_(), stat_locked_(false), stale_stats_(false), sstable_row_cnt_(0), memtable_row_cnt_(0) {} @@ -321,6 +325,8 @@ public: void set_last_analyzed(int64_t last_analyzed) { last_analyzed_ = last_analyzed; } void set_stat_locked(bool locked) { stat_locked_ = locked; } bool get_stat_locked() const { return stat_locked_; } + void set_stale_stats(bool stale_stats) { stale_stats_ = stale_stats; } + bool get_stale_stats() const { return stale_stats_; } int64_t get_sstable_row_cnt() const { return sstable_row_cnt_; } int64_t get_memtable_row_cnt() const { return memtable_row_cnt_; } @@ -335,6 +341,7 @@ public: K(cg_macro_cnt_arr_), K(cg_micro_cnt_arr_), K(stat_locked_), + K(stale_stats_), K(sstable_row_cnt_), K(memtable_row_cnt_)); @@ -349,6 +356,7 @@ private: ObArray cg_macro_cnt_arr_; ObArray cg_micro_cnt_arr_; bool stat_locked_; + bool stale_stats_; int64_t sstable_row_cnt_; int64_t memtable_row_cnt_; }; diff --git a/src/sql/engine/cmd/ob_analyze_executor.cpp b/src/sql/engine/cmd/ob_analyze_executor.cpp index 842abf31ce..3786086e06 100644 --- a/src/sql/engine/cmd/ob_analyze_executor.cpp +++ b/src/sql/engine/cmd/ob_analyze_executor.cpp @@ -60,6 +60,8 @@ int ObAnalyzeExecutor::execute(ObExecContext &ctx, ObAnalyzeStmt &stmt) LOG_USER_ERROR(OB_NOT_SUPPORTED, "analyze table during restore or standby cluster"); } else if (OB_FAIL(ObDbmsStatsUtils::implicit_commit_before_gather_stats(ctx))) { LOG_WARN("failed to implicit commit before gather stats", K(ret)); + } else if (OB_FAIL(ObDbmsStatsUtils::cancel_async_gather_stats(ctx))) { + LOG_WARN("failed to cancel async gather stats", K(ret)); } else if (OB_FAIL(stmt.fill_table_stat_params(ctx, params))) { LOG_WARN("failed to fill table stat param", K(ret)); } else { diff --git a/src/sql/ob_optimizer_trace_impl.h b/src/sql/ob_optimizer_trace_impl.h index 8599392b02..ebdce9213d 100644 --- a/src/sql/ob_optimizer_trace_impl.h +++ b/src/sql/ob_optimizer_trace_impl.h @@ -363,6 +363,11 @@ public: typename std::enable_if, T>::value, int>::type append(const T& value); + //for ObIArrayWrap + template + typename std::enable_if, T>::value, int>::type + append(const T& value); + //for ObIArray template typename std::enable_if, T>::value, int>::type @@ -505,6 +510,23 @@ ObOptimizerTraceImpl::append(const T& value) return ret; } +//for ObIArray +template +typename std::enable_if, T>::value, int>::type +ObOptimizerTraceImpl::append(const T& value) +{ + int ret = OB_SUCCESS; + append("["); + for (int i = 0; OB_SUCC(ret) && i < value.count(); ++i) { + if (i > 0) { + append(", "); + } + ret = append(value.at(i)); + } + append("]"); + return ret; +} + //for ObIArray template typename std::enable_if, T>::value, int>::type diff --git a/src/sql/optimizer/ob_access_path_estimation.cpp b/src/sql/optimizer/ob_access_path_estimation.cpp index 1c713ac911..59362114ca 100644 --- a/src/sql/optimizer/ob_access_path_estimation.cpp +++ b/src/sql/optimizer/ob_access_path_estimation.cpp @@ -76,7 +76,7 @@ int ObAccessPathEstimation::inner_estimate_rowcount(ObOptimizerContext &ctx, valid_methods & hint_specify_methods ? valid_methods & hint_specify_methods : valid_methods, method))) { LOG_WARN("failed to choose one est method", K(ret), K(valid_methods)); - } else if (OB_FAIL(do_estimate_rowcount(ctx, paths, is_inner_path, filter_exprs, valid_methods, method))) { + } else if (OB_FAIL(do_estimate_rowcount(ctx, paths, filter_exprs, valid_methods, method))) { LOG_WARN("failed to do estimate rowcount", K(ret), K(method), K(valid_methods)); } for (int64_t i = 0; OB_SUCC(ret) && i < paths.count(); i ++) { @@ -92,14 +92,13 @@ int ObAccessPathEstimation::inner_estimate_rowcount(ObOptimizerContext &ctx, int ObAccessPathEstimation::do_estimate_rowcount(ObOptimizerContext &ctx, common::ObIArray &paths, - const bool is_inner_path, const ObIArray &filter_exprs, ObBaseTableEstMethod &valid_methods, ObBaseTableEstMethod &method) { int ret = OB_SUCCESS; bool is_success = true; - LOG_TRACE("Try to do estimate rowcount", K(method), K(is_inner_path)); + LOG_TRACE("Try to do estimate rowcount", K(method)); if (OB_UNLIKELY(EST_INVALID == method) || OB_UNLIKELY((method & EST_DS_FULL) && (method & EST_DS_BASIC)) || @@ -111,7 +110,7 @@ int ObAccessPathEstimation::do_estimate_rowcount(ObOptimizerContext &ctx, if (OB_SUCC(ret) && (method & (EST_DS_BASIC | EST_DS_FULL))) { bool only_ds_basic_stat = (method & EST_DS_BASIC); if (OB_FAIL(process_dynamic_sampling_estimation( - ctx, paths, is_inner_path, filter_exprs, only_ds_basic_stat, is_success))) { + ctx, paths, filter_exprs, only_ds_basic_stat, is_success))) { LOG_WARN("failed to process statistics estimation", K(ret)); } else if (!is_success) { valid_methods &= ~EST_DS_BASIC; @@ -311,6 +310,11 @@ int ObAccessPathEstimation::choose_best_est_method(ObOptimizerContext &ctx, bool can_use_ds = valid_methods & (EST_DS_FULL | EST_DS_BASIC); bool can_use_storage = valid_methods & EST_STORAGE; + if (OB_ISNULL(ctx.get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null param", K(ret)); + } + // check is simple scene bool is_table_get = false; for (int64_t i = 0; OB_SUCC(ret) && !is_table_get && i < paths.count(); ++i) { @@ -342,8 +346,7 @@ int ObAccessPathEstimation::choose_best_est_method(ObOptimizerContext &ctx, // check is complex scene if (OB_SUCC(ret) && !is_simple_scene && !is_complex_scene && (valid_methods | EST_DS_FULL)) { - ObArenaAllocator tmp_alloc("ObOptSel"); - ObSelEstimatorFactory factory(tmp_alloc); + ObSelEstimatorFactory factory(ctx.get_session_info()->get_effective_tenant_id()); const OptSelectivityCtx* sel_ctx = NULL; if (OB_UNLIKELY(paths.empty()) || OB_ISNULL(paths.at(0)) || @@ -364,6 +367,20 @@ int ObAccessPathEstimation::choose_best_est_method(ObOptimizerContext &ctx, } } + //check opt stats is expired + if (OB_SUCC(ret) && !is_simple_scene && !is_complex_scene && can_use_ds) { + const ObLogPlan* log_plan = NULL; + const OptTableMeta *table_meta = NULL; + if (!paths.empty() && paths.at(0)->parent_ != NULL && + (log_plan = paths.at(0)->parent_->get_plan()) != NULL && + (table_meta = log_plan->get_basic_table_metas().get_table_meta_by_table_id(paths.at(0)->table_id_)) != NULL && + table_meta->is_opt_stat_expired() && + !table_meta->is_stat_locked()) { + is_simple_scene = false; + is_complex_scene = true; + } + } + if (OB_FAIL(ret)) { } else if (is_simple_scene) { method = choose_one_est_method(valid_methods, simple_est_priority, priority_cnt); @@ -1676,13 +1693,12 @@ int ObAccessPathEstimation::estimate_full_table_rowcount_by_meta_table(ObOptimiz int ObAccessPathEstimation::process_dynamic_sampling_estimation(ObOptimizerContext &ctx, ObIArray &paths, - const bool is_inner_path, const ObIArray &filter_exprs, bool only_ds_basic_stat, bool &is_success) { int ret = OB_SUCCESS; - LOG_TRACE("begin process dynamic sampling estimation", K(paths), K(is_inner_path)); + LOG_TRACE("begin process dynamic sampling estimation", K(paths)); ObDSTableParam ds_table_param; ObSEArray ds_result_items; is_success = true; @@ -1704,47 +1720,52 @@ int ObAccessPathEstimation::process_dynamic_sampling_estimation(ObOptimizerConte LOG_WARN("failed to get ds table param", K(ret), K(ds_table_param)); } else if (!ds_table_param.is_valid()) { is_success = false; - } else if (OB_FAIL(add_ds_result_items(paths, filter_exprs, specify_ds, - ds_result_items, only_ds_basic_stat))) { - LOG_WARN("failed to init ds result items", K(ret)); } else { - OPT_TRACE_TITLE("BEGIN DYNAMIC SAMPLE ESTIMATION"); - ObArenaAllocator allocator("ObOpTableDS", OB_MALLOC_NORMAL_BLOCK_SIZE, ctx.get_session_info()->get_effective_tenant_id()); - ObDynamicSampling dynamic_sampling(ctx, allocator); - int64_t start_time = ObTimeUtility::current_time(); - bool throw_ds_error = false; - if (OB_FAIL(dynamic_sampling.estimate_table_rowcount(ds_table_param, ds_result_items, throw_ds_error))) { - if (!throw_ds_error && !is_retry_ret(ret)) { - LOG_WARN("failed to estimate table rowcount caused by some reason, please check!!!", K(ret), - K(start_time), K(ObTimeUtility::current_time() - start_time), K(ds_table_param), - K(ctx.get_session_info()->get_current_query_string())); - if (OB_FAIL(ObDynamicSamplingUtils::add_failed_ds_table_list(table_meta->get_ref_table_id(), - table_meta->get_all_used_parts(), - ctx.get_failed_ds_tab_list()))) { - LOG_WARN("failed to add failed ds table list", K(ret)); + bool only_ds_filter = (table_meta->use_opt_stat() && !table_meta->is_opt_stat_expired()) || table_meta->is_stat_locked(); + if (OB_FAIL(add_ds_result_items(paths, filter_exprs, specify_ds, + ds_result_items, + only_ds_basic_stat, + only_ds_filter))) { + LOG_WARN("failed to init ds result items", K(ret)); + } else if (!ds_result_items.empty()) { + OPT_TRACE_TITLE("BEGIN DYNAMIC SAMPLE ESTIMATION"); + ObArenaAllocator allocator("ObOpTableDS", OB_MALLOC_NORMAL_BLOCK_SIZE, ctx.get_session_info()->get_effective_tenant_id()); + ObDynamicSampling dynamic_sampling(ctx, allocator); + int64_t start_time = ObTimeUtility::current_time(); + bool throw_ds_error = false; + if (OB_FAIL(dynamic_sampling.estimate_table_rowcount(ds_table_param, ds_result_items, throw_ds_error))) { + if (!throw_ds_error && !is_retry_ret(ret)) { + LOG_WARN("failed to estimate table rowcount caused by some reason, please check!!!", K(ret), + K(start_time), K(ObTimeUtility::current_time() - start_time), K(ds_table_param), + K(ctx.get_session_info()->get_current_query_string())); + if (OB_FAIL(ObDynamicSamplingUtils::add_failed_ds_table_list(table_meta->get_ref_table_id(), + table_meta->get_all_used_parts(), + ctx.get_failed_ds_tab_list()))) { + LOG_WARN("failed to add failed ds table list", K(ret)); + } else { + is_success = false; + } } else { - is_success = false; + LOG_WARN("failed to dynamic sampling", K(ret), K(start_time), K(ds_table_param)); } - } else { - LOG_WARN("failed to dynamic sampling", K(ret), K(start_time), K(ds_table_param)); + } else if (OB_FAIL(update_table_stat_info_by_dynamic_sampling(paths.at(0), + ds_table_param.ds_level_, + ds_result_items, + only_ds_filter, + no_ds_data))) { + LOG_WARN("failed to update table stat info by dynamic sampling", K(ret)); + } else if (only_ds_basic_stat || no_ds_data) { + if (OB_FAIL(process_statistics_estimation(paths))) { + LOG_WARN("failed to process statistics estimation", K(ret)); + } + } else if (OB_FAIL(estimate_path_rowcount_by_dynamic_sampling(ds_table_param.table_id_, paths, ds_result_items))) { + LOG_WARN("failed to estimate path rowcount by dynamic sampling", K(ret)); } - } else if (OB_FAIL(update_table_stat_info_by_dynamic_sampling(paths.at(0), - ds_table_param.ds_level_, - ds_result_items, - no_ds_data))) { - LOG_WARN("failed to update table stat info by dynamic sampling", K(ret)); - } else if (only_ds_basic_stat || no_ds_data) { - if (OB_FAIL(process_statistics_estimation(paths))) { - LOG_WARN("failed to process statistics estimation", K(ret)); - } - } else if (OB_FAIL(estimate_path_rowcount_by_dynamic_sampling(ds_table_param.table_id_, paths, - is_inner_path, ds_result_items))) { - LOG_WARN("failed to estimate path rowcount by dynamic sampling", K(ret)); - LOG_TRACE("finish dynamic sampling", K(only_ds_basic_stat), K(no_ds_data), K(is_success)); + LOG_TRACE("finish dynamic sampling", K(only_ds_basic_stat), K(only_ds_filter), K(no_ds_data), K(is_success)); + OPT_TRACE("end to process table dynamic sampling estimation"); + OPT_TRACE("dynamic sampling estimation result:"); + OPT_TRACE(ds_result_items); } - OPT_TRACE("end to process table dynamic sampling estimation"); - OPT_TRACE("dynamic sampling estimation result:"); - OPT_TRACE(ds_result_items); } return ret; } @@ -1754,7 +1775,8 @@ int ObAccessPathEstimation::add_ds_result_items(ObIArray &paths, const ObIArray &filter_exprs, const bool specify_ds, ObIArray &ds_result_items, - bool only_ds_basic_stat) + bool only_ds_basic_stat, + bool only_ds_filter) { int ret = OB_SUCCESS; bool all_path_is_get = false; @@ -1776,7 +1798,8 @@ int ObAccessPathEstimation::add_ds_result_items(ObIArray &paths, } else { //1.init ds basic stat item ObDSResultItem basic_item(ObDSResultItemType::OB_DS_BASIC_STAT, paths.at(0)->ref_table_id_); - if (OB_FAIL(get_need_dynamic_sampling_columns(paths.at(0)->parent_->get_plan(), + if (!only_ds_filter && + OB_FAIL(get_need_dynamic_sampling_columns(paths.at(0)->parent_->get_plan(), paths.at(0)->table_id_, filter_exprs, true, false, basic_item.exprs_))) { @@ -1820,7 +1843,7 @@ int ObAccessPathEstimation::add_ds_result_items(ObIArray &paths, } } LOG_TRACE("succeed to add_ds result items", K(paths), K(all_path_is_get), K(filter_exprs), - K(ds_result_items), K(only_ds_basic_stat)); + K(ds_result_items), K(only_ds_basic_stat), K(only_ds_filter)); return ret; } @@ -1887,6 +1910,7 @@ int ObAccessPathEstimation::get_need_dynamic_sampling_columns(const ObLogPlan* l int ObAccessPathEstimation::update_table_stat_info_by_dynamic_sampling(AccessPath *path, int64_t ds_level, ObIArray &ds_result_items, + bool only_ds_filter, bool &no_ds_data) { int ret = OB_SUCCESS; @@ -1909,21 +1933,25 @@ int ObAccessPathEstimation::update_table_stat_info_by_dynamic_sampling(AccessPat ObCostTableScanInfo &est_cost_info = path->est_cost_info_; OptTableMetas &table_metas = path->parent_->get_plan()->get_basic_table_metas(); OptTableMeta *table_meta = table_metas.get_table_meta_by_table_id(path->table_id_); - bool no_add_micro_block = (OB_E(EventTable::EN_LEADER_STORAGE_ESTIMATION) OB_SUCCESS) != OB_SUCCESS; - if (!no_add_micro_block) { - est_cost_info.table_meta_info_->micro_block_count_ = item->stat_handle_.stat_->get_micro_block_num(); - } - est_cost_info.table_meta_info_->table_row_count_ = row_count; if (OB_ISNULL(table_meta) || OB_UNLIKELY(OB_INVALID_ID == table_meta->get_ref_table_id())) { //do nothing - } else if (OB_FAIL(update_column_metas_by_ds_col_stat(row_count, - item->stat_handle_.stat_->get_ds_col_stats(), - table_meta->get_column_metas()))) { - LOG_WARN("failed to fill ds col stat", K(ret)); } else { - table_meta->set_rows(row_count); - table_meta->set_use_ds_stat(); table_meta->set_ds_level(ds_level); + if (!only_ds_filter) { + bool no_add_micro_block = (OB_E(EventTable::EN_LEADER_STORAGE_ESTIMATION) OB_SUCCESS) != OB_SUCCESS; + if (!no_add_micro_block) { + est_cost_info.table_meta_info_->micro_block_count_ = item->stat_handle_.stat_->get_micro_block_num(); + } + est_cost_info.table_meta_info_->table_row_count_ = row_count; + if (OB_FAIL(update_column_metas_by_ds_col_stat(row_count, + item->stat_handle_.stat_->get_ds_col_stats(), + table_meta->get_column_metas()))) { + LOG_WARN("failed to fill ds col stat", K(ret)); + } else { + table_meta->set_rows(row_count); + table_meta->set_use_ds_stat(); + } + } } } return ret; @@ -1950,7 +1978,6 @@ int ObAccessPathEstimation::update_table_stat_info_by_default(AccessPath *path) int ObAccessPathEstimation::estimate_path_rowcount_by_dynamic_sampling(const uint64_t table_id, ObIArray &paths, - const bool is_inner_path, ObIArray &ds_result_items) { int ret = OB_SUCCESS; @@ -2010,20 +2037,6 @@ int ObAccessPathEstimation::estimate_path_rowcount_by_dynamic_sampling(const uin index_back_row_count = index_back_row_count != 0 ? index_back_row_count : logical_row_count; physical_row_count = logical_row_count; } - if (is_inner_path) { - if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, - *est_cost_info.sel_ctx_, - est_cost_info.pushdown_prefix_filters_, - est_cost_info.pushdown_prefix_filter_sel_, - paths.at(i)->parent_->get_plan()->get_predicate_selectivities()))) { - LOG_WARN("failed to calculate selectivity", K(est_cost_info.pushdown_prefix_filters_), K(ret)); - } else { - logical_row_count = logical_row_count * est_cost_info.pushdown_prefix_filter_sel_; - index_back_row_count = index_back_row_count * est_cost_info.pushdown_prefix_filter_sel_; - physical_row_count = logical_row_count; - output_rowcnt = output_rowcnt * est_cost_info.pushdown_prefix_filter_sel_; - } - } if (OB_SUCC(ret)) { // block sampling double block_sample_ratio = est_cost_info.sample_info_.is_block_sample() ? diff --git a/src/sql/optimizer/ob_access_path_estimation.h b/src/sql/optimizer/ob_access_path_estimation.h index 6fe3c9ecce..2489c42313 100644 --- a/src/sql/optimizer/ob_access_path_estimation.h +++ b/src/sql/optimizer/ob_access_path_estimation.h @@ -98,7 +98,6 @@ private: static int do_estimate_rowcount(ObOptimizerContext &ctx, common::ObIArray &paths, - const bool is_inner_path, const ObIArray &filter_exprs, ObBaseTableEstMethod &valid_methods, ObBaseTableEstMethod &method); @@ -137,7 +136,6 @@ private: static int process_dynamic_sampling_estimation(ObOptimizerContext &ctx, ObIArray &paths, - const bool is_inner_path, const ObIArray &filter_exprs, bool only_ds_basic_stat, bool &is_success); @@ -237,17 +235,18 @@ private: const ObIArray &filter_exprs, const bool specify_ds, ObIArray &ds_result_items, - bool only_ds_basic_stat); + bool only_ds_basic_stat, + bool only_ds_filter); static int update_table_stat_info_by_dynamic_sampling(AccessPath *path, int64_t ds_level, ObIArray &ds_result_items, + bool only_ds_filter, bool &no_ds_data); static int update_table_stat_info_by_default(AccessPath *path); static int estimate_path_rowcount_by_dynamic_sampling(const uint64_t table_id, ObIArray &paths, - const bool is_inner_path, ObIArray &ds_result_items); static int classify_paths(common::ObIArray &paths, common::ObIArray &normal_paths, diff --git a/src/sql/optimizer/ob_dynamic_sampling.cpp b/src/sql/optimizer/ob_dynamic_sampling.cpp index 2b155691aa..6bc28f57a6 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.cpp +++ b/src/sql/optimizer/ob_dynamic_sampling.cpp @@ -403,7 +403,7 @@ int ObDynamicSampling::get_table_dml_info(const uint64_t tenant_id, cur_modified_dml_cnt, false))) { LOG_WARN("failed to estimate modified count", K(ret)); - } else if (OB_FAIL(pl::ObDbmsStats::get_table_stale_percent_threshold(*ctx_->get_exec_ctx(), + } else if (OB_FAIL(pl::ObDbmsStats::get_table_stale_percent_threshold(ctx_->get_exec_ctx()->get_sql_proxy(), tenant_id, table_id, stale_percent_threshold))) { @@ -772,7 +772,6 @@ int ObDynamicSampling::calc_table_sample_block_ratio(const ObDSTableParam ¶m { int ret = OB_SUCCESS; int64_t sample_micro_cnt = param.sample_block_cnt_; - const int64_t MAX_FULL_SCAN_ROW_COUNT = 100000; int64_t macro_threshold = 200; if (param.is_virtual_table_) { sample_block_ratio_ = 100.0; @@ -783,6 +782,8 @@ int ObDynamicSampling::calc_table_sample_block_ratio(const ObDSTableParam ¶m LOG_WARN("get unexpected error", K(ret), K(param)); } else if (OB_FAIL(estimate_table_block_count_and_row_count(param))) { LOG_WARN("failed to estimate table block count and row count", K(ret)); + } else if (sstable_row_count_ + memtable_row_count_ <= MAGIC_MAX_AUTO_SAMPLE_SIZE) { + sample_block_ratio_ = 100.0; } else { int64_t max_allowed_multiple = sample_micro_cnt > OB_DS_BASIC_SAMPLE_MICRO_CNT ? sample_micro_cnt / OB_DS_BASIC_SAMPLE_MICRO_CNT : 1; if (micro_block_num_ > OB_DS_MAX_BASIC_SAMPLE_MICRO_CNT * max_allowed_multiple && diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index 898b226f78..f06e5ab72c 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -2964,7 +2964,6 @@ int ObJoinOrder::estimate_rowcount_for_access_path(ObIArray &all_pa int ret = OB_SUCCESS; bool is_use_ds = false; method = EST_INVALID; - get_plan()->get_selectivity_ctx().set_dependency_type(FilterDependencyType::INDEPENDENT); if (OB_FAIL(ObAccessPathEstimation::estimate_rowcount(OPT_CTX, all_paths, is_inner_path, filter_exprs, @@ -4378,6 +4377,7 @@ int ObJoinOrder::estimate_size_and_width_for_subquery(uint64_t table_id, table_id_, output_row_size_))) { LOG_WARN("estimate width of row failed", K(table_id_), K(ret)); + } else if (FALSE_IT(get_plan()->get_selectivity_ctx().init_op_ctx(NULL, root->get_card()))) { } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(get_plan()->get_basic_table_metas(), get_plan()->get_selectivity_ctx(), get_restrict_infos(), @@ -7073,7 +7073,7 @@ int JoinPath::do_re_estimate_cost(EstimateCostInfo &info, double &card, double & LOG_WARN("failed to re estimate cost", K(ret)); } else if (OB_FAIL(try_set_batch_nlj_for_right_access_path(false))) { LOG_WARN("failed to try set batch nlj for right access path", K(ret)); - } else if (OB_FAIL(re_estimate_rows(left_output_rows, right_output_rows, card))) { + } else if (OB_FAIL(re_estimate_rows(info.join_filter_infos_, left_output_rows, right_output_rows, card))) { LOG_WARN("failed to re estimate rows", K(ret)); } else if (NESTED_LOOP_JOIN == join_algo_) { if (OB_FAIL(cost_nest_loop_join(info.need_parallel_, @@ -7194,21 +7194,29 @@ int JoinPath::get_re_estimate_param(EstimateCostInfo ¶m, return ret; } -int JoinPath::re_estimate_rows(double left_output_rows, double right_output_rows, double &row_count) +int JoinPath::re_estimate_rows(ObIArray &pushdown_join_filter_infos, + double left_output_rows, + double right_output_rows, + double &row_count) { int ret = OB_SUCCESS; double selectivity = 1.0; ObLogPlan *plan = NULL; ObJoinOrder *left_tree = NULL; ObJoinOrder *right_tree = NULL; + const ObDMLStmt *stmt = NULL; if (OB_ISNULL(left_path_) || OB_ISNULL(right_path_) || OB_ISNULL(parent_) || OB_ISNULL(plan = parent_->get_plan()) || OB_ISNULL(left_tree = left_path_->parent_) || - OB_ISNULL(right_tree = right_path_->parent_)) { + OB_ISNULL(right_tree = right_path_->parent_) || + OB_ISNULL(stmt = plan->get_stmt()) || + OB_ISNULL(plan->get_optimizer_context().get_query_ctx())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(left_path_), K(right_path_), K(plan), K(ret)); - } else if (HASH_JOIN == join_algo_ && - !join_filter_infos_.empty()) { + } else if (!plan->get_optimizer_context().get_query_ctx()-> + check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, COMPAT_VERSION_4_3_3) ? + HASH_JOIN == join_algo_ && !join_filter_infos_.empty() : + HASH_JOIN == join_algo_ && pushdown_join_filter_infos.empty()) { row_count = get_path_output_rows(); } else if (right_path_->is_inner_path()) { if (left_tree->get_output_rows() > 0) { @@ -7257,6 +7265,12 @@ int JoinPath::re_estimate_rows(double left_output_rows, double right_output_rows selectivity, equal_sets))) { LOG_WARN("failed to calc join output rows", K(ret)); + } else { + for (int64_t i = 0; i < join_filter_infos_.count(); i ++) { + if (join_filter_infos_.at(i).join_filter_selectivity_ > OB_DOUBLE_EPSINON) { + row_count /= join_filter_infos_.at(i).join_filter_selectivity_; + } + } } } return ret; @@ -7846,9 +7860,32 @@ int ObJoinOrder::generate_base_paths() ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected base path type", K(get_type()), K(ret)); } + if (FAILEDx(init_ambient_card())) { + LOG_WARN("failed to init ambient cardinality", K(ret)); + } return ret; } +int ObJoinOrder::init_ambient_card() +{ + int ret = OB_SUCCESS; + const ObDMLStmt* stmt = NULL; + int64_t idx = -1; + if (OB_ISNULL(get_plan()) || OB_ISNULL(stmt = get_plan()->get_stmt()) || + FALSE_IT(idx = get_plan()->get_stmt()->get_table_bit_index(table_id_)) || + OB_UNLIKELY(idx < 0) || OB_UNLIKELY(idx > stmt->get_table_size())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected param", K(get_plan()), K(table_id_), K(idx), KPC(stmt)); + } else if (OB_FAIL(ambient_card_.prepare_allocate(stmt->get_table_size() + 1))) { + LOG_WARN("failed to allocate", K(ret)); + } else { + for (int64_t i = 0; i < ambient_card_.count(); i ++) { + ambient_card_.at(i) = -1; + } + ambient_card_.at(idx) = output_rows_; + } + return ret; +} int ObJoinOrder::generate_json_table_paths() { @@ -8147,9 +8184,7 @@ int ObJoinOrder::estimate_size_and_width_for_fake_cte(uint64_t table_id, ObSelec } else if (OB_ISNULL(nonrecursive_root)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(nonrecursive_root)); - } else if (FALSE_IT(nonrecursive_plan->get_selectivity_ctx().init_op_ctx( - &nonrecursive_root->get_output_equal_sets(), nonrecursive_root->get_card()))) { - // do nothing + } else if (FALSE_IT(nonrecursive_plan->get_selectivity_ctx().init_op_ctx(nonrecursive_root))) { } else if (OB_FAIL(get_plan()->get_basic_table_metas().add_generate_table_meta_info( get_plan()->get_stmt(), static_cast(nonrecursive_plan->get_stmt()), @@ -8158,6 +8193,7 @@ int ObJoinOrder::estimate_size_and_width_for_fake_cte(uint64_t table_id, ObSelec nonrecursive_plan->get_selectivity_ctx(), nonrecursive_root->get_card()))) { LOG_WARN("failed to add generate table meta info", K(ret)); + } else if (FALSE_IT(get_plan()->get_selectivity_ctx().init_op_ctx(NULL, nonrecursive_root->get_card()))) { } else if (OB_FAIL(ObOptEstCost::estimate_width_for_table(get_plan()->get_basic_table_metas(), get_plan()->get_selectivity_ctx(), stmt->get_column_items(), @@ -8532,6 +8568,9 @@ int ObJoinOrder::generate_normal_subquery_paths() LOG_WARN("failed to push down filter into subquery", K(ret)); } else if (OB_FAIL(append(helper.filters_, candi_nonpushdown_quals))) { LOG_WARN("failed to append", K(ret)); + } else if (OB_FAIL(ObOptimizerUtil::get_onetime_exprs(helper.pushdown_filters_, + helper.exec_params_))) { + LOG_WARN("failed to get onetime exprs", K(ret)); } else if (OB_FAIL(generate_subquery_paths(helper))) { LOG_WARN("failed to generate subquery path", K(ret)); } @@ -8547,6 +8586,7 @@ int ObJoinOrder::generate_subquery_paths(PathHelper &helper) const ObDMLStmt *parent_stmt = NULL; const ObDMLStmt *child_stmt = NULL; ObLogicalOperator *best_child_plan = NULL; + ObSEArray pushdown_onetimes; if (OB_ISNULL(get_plan()) || OB_ISNULL(parent_stmt = get_plan()->get_stmt()) || OB_ISNULL(child_stmt = static_cast(helper.child_stmt_))) { ret = OB_ERR_UNEXPECTED; @@ -8557,6 +8597,10 @@ int ObJoinOrder::generate_subquery_paths(PathHelper &helper) LOG_WARN("failed to create plan", K(ret)); } else if (OB_FAIL(log_plan->add_pushdown_filters(helper.pushdown_filters_))) { LOG_WARN("failed to add pushdown filters", K(ret)); + } else if (OB_FAIL(log_plan->add_exec_params_meta(helper.exec_params_, + get_plan()->get_basic_table_metas(), + get_plan()->get_selectivity_ctx()))) { + LOG_WARN("failed to prepare opt exec param meta", K(ret)); } else { log_plan->set_is_subplan_scan(true); log_plan->set_nonrecursive_plan_for_fake_cte(get_plan()->get_nonrecursive_plan_for_fake_cte()); @@ -8754,6 +8798,15 @@ int ObJoinOrder::init_join_order(const ObJoinOrder *left_tree, } else if (OB_FAIL(get_output_tables().add_members(right_tree->get_output_tables()))) { LOG_WARN("fail to add left tree's output tables", K(ret)); } + + if (FAILEDx(merge_ambient_card(left_tree->get_ambient_card(), + right_tree->get_ambient_card(), + ambient_card_))) { + LOG_WARN("failed to merge rowcnts", K(ret)); + } else { + set_output_rows(-1.0); + } + //设置join info if (OB_SUCC(ret)) { JoinInfo* temp_join_info = NULL; @@ -9099,14 +9152,14 @@ int ObJoinOrder::inner_generate_join_paths(const ObJoinOrder &left_tree, &right_tree.get_tables(), left_tree.get_output_rows(), right_tree.get_output_rows()); - if (OB_FAIL(ObOptSelectivity::calculate_selectivity( + if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity( get_plan()->get_update_table_metas(), get_plan()->get_selectivity_ctx(), hash_join_conditions, equal_cond_sel, get_plan()->get_predicate_selectivities()))) { LOG_WARN("failed to calculate selectivity", K(ret), K(hash_join_conditions)); - } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity( + } else if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity( get_plan()->get_update_table_metas(), get_plan()->get_selectivity_ctx(), hash_join_filters, @@ -9215,14 +9268,14 @@ int ObJoinOrder::inner_generate_join_paths(const ObJoinOrder &left_tree, &right_tree.get_tables(), left_tree.get_output_rows(), right_tree.get_output_rows()); - if (OB_FAIL(ObOptSelectivity::calculate_selectivity( + if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity( get_plan()->get_update_table_metas(), get_plan()->get_selectivity_ctx(), merge_join_conditions, equal_cond_sel, get_plan()->get_predicate_selectivities()))) { LOG_WARN("failed to calculate selectivity", K(ret), K(merge_join_conditions)); - } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity( + } else if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity( get_plan()->get_update_table_metas(), get_plan()->get_selectivity_ctx(), merge_join_filters, @@ -9282,6 +9335,7 @@ int ObJoinOrder::inner_generate_join_paths(const ObJoinOrder &left_tree, } } } + get_plan()->get_selectivity_ctx().clear(); return ret; } @@ -10734,16 +10788,33 @@ int ObJoinOrder::find_possible_join_filter_tables(const Path &left_path, int ObJoinOrder::fill_join_filter_info(JoinFilterInfo &join_filter_info) { int ret = OB_SUCCESS; - if (OB_ISNULL(get_plan())) { + uint64_t opt_version = 0; + if (OB_ISNULL(get_plan()) || OB_ISNULL(OPT_CTX.get_query_ctx())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null", K(ret), K(get_plan())); + } else if (FALSE_IT(get_plan()->get_selectivity_ctx().clear())) { } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(get_plan()->get_update_table_metas(), get_plan()->get_selectivity_ctx(), join_filter_info.rexprs_, join_filter_info.row_count_, - join_filter_info.right_distinct_card_, - false))) { + join_filter_info.right_distinct_card_))) { LOG_WARN("failed to calc distinct", K(ret)); + } else if (!OPT_CTX.get_query_ctx()->check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + // do nothing + } else if (OB_FAIL(ObOptSelectivity::is_columns_contain_pkey(get_plan()->get_basic_table_metas(), + join_filter_info.rexprs_, + join_filter_info.is_right_contain_pk_, + join_filter_info.is_right_union_pk_))) { + LOG_WARN("failed to check is columns contain pkey", K(ret)); + } else if (join_filter_info.is_right_contain_pk_ && join_filter_info.is_right_union_pk_) { + const OptTableMeta *table_meta = get_plan()->get_update_table_metas().get_table_meta_by_table_id(join_filter_info.table_id_); + if (OB_NOT_NULL(table_meta)) { + join_filter_info.right_distinct_card_ = std::max(1.0, table_meta->get_rows()); + } + if (OB_NOT_NULL(table_meta = get_plan()->get_basic_table_metas().get_table_meta_by_table_id(join_filter_info.table_id_))) { + join_filter_info.right_origin_rows_ = std::max(1.0, table_meta->get_rows()); + } } return ret; @@ -10999,7 +11070,8 @@ int ObJoinOrder::check_normal_join_filter_valid(const Path& left_path, bool cur_dfo_has_shuffle_bf = false; if (OB_ISNULL(plan) || OB_ISNULL(left_tree=left_path.parent_) || - OB_ISNULL(stmt = plan->get_stmt())) { + OB_ISNULL(stmt = plan->get_stmt()) || + OB_ISNULL(OPT_CTX.get_query_ctx())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null plan", K(ret)); } @@ -11015,14 +11087,22 @@ int ObJoinOrder::check_normal_join_filter_valid(const Path& left_path, } else { double rate = 1 - join_filter_sel; double threshold = 0.6; + double misjudgment_rate = (static_cast(GCONF._bloom_filter_ratio) / 100.0); if (info.in_current_dfo_) { threshold = 0.9; } info.join_filter_selectivity_ = join_filter_sel; + if (OPT_CTX.get_query_ctx()->check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3) && + 0 <= misjudgment_rate && misjudgment_rate <= 1.0) { + info.join_filter_selectivity_ += (1 - join_filter_sel) * misjudgment_rate; + } info.can_use_join_filter_ = rate >= threshold || NULL != info.force_filter_; OPT_TRACE("join filter info:"); OPT_TRACE("in current dfo:", info.in_current_dfo_); - OPT_TRACE("filter selectivity:", info.join_filter_selectivity_); + OPT_TRACE("right distinct card:", info.right_distinct_card_); + OPT_TRACE("theoretical filter selectivity:", join_filter_sel); + OPT_TRACE("actual filter selectivity:", info.join_filter_selectivity_); OPT_TRACE("force use join filter:", NULL != info.force_filter_); OPT_TRACE("use join filter:", info.can_use_join_filter_); LOG_TRACE("succeed to check normal join filter", K(info)); @@ -11040,29 +11120,94 @@ int ObJoinOrder::calc_join_filter_selectivity(const Path& left_path, double left_distinct_card = 1.0; double right_distinct_card = 1.0; join_filter_selectivity = 1.0; - if (OB_ISNULL(plan)) { + bool is_pk_join_fk = false; + bool est_enhance_enable = OPT_CTX.get_query_ctx()->check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3); + if (OB_ISNULL(plan) || OB_ISNULL(left_path.parent_) || + OB_ISNULL(OPT_CTX.get_query_ctx())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null plan", K(ret)); + } else if (FALSE_IT(get_plan()->get_selectivity_ctx().init_op_ctx(&left_path.parent_->get_output_equal_sets(), + left_path.get_path_output_rows(), + &left_path.parent_->get_ambient_card()))) { + } else if (est_enhance_enable && + OB_FAIL(calc_join_filter_sel_for_pk_join_fk(left_path, info, join_filter_selectivity, is_pk_join_fk))) { + LOG_WARN("failed to calc pk join fk join filter sel", K(ret)); + } else if (is_pk_join_fk) { + // do nothing } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(plan->get_update_table_metas(), plan->get_selectivity_ctx(), info.lexprs_, left_path.get_path_output_rows(), left_distinct_card, - false))) { + est_enhance_enable))) { LOG_WARN("failed to calc distinct", K(ret)); } else { join_filter_selectivity = left_distinct_card / info.right_distinct_card_; + } + if (OB_SUCC(ret)) { if (join_filter_selectivity < 0) { join_filter_selectivity = 0; } else if (join_filter_selectivity > 0.9) { join_filter_selectivity = 0.9; } - LOG_TRACE("succeed to calc join filter selectivity", K(join_filter_selectivity), + LOG_TRACE("succeed to calc join filter selectivity", K(is_pk_join_fk), K(join_filter_selectivity), K(left_distinct_card), K(info.right_distinct_card_)); } return ret; } +int ObJoinOrder::calc_join_filter_sel_for_pk_join_fk(const Path& left_path, + JoinFilterInfo& info, + double &join_filter_selectivity, + bool &is_valid) +{ + int ret = OB_SUCCESS; + is_valid = false; + ObLogPlan *plan = get_plan(); + bool left_contain_pk = false; + bool is_left_union_pk = false; + uint64_t left_table_id = OB_INVALID_ID; + double pk_origin_rows = 1.0; + double left_ndv = 1.0; + if (OB_ISNULL(plan) || OB_ISNULL(left_path.parent_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null plan", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::is_columns_contain_pkey(plan->get_update_table_metas(), + info.lexprs_, + left_contain_pk, + is_left_union_pk, + &left_table_id))) { + LOG_WARN("failed to check is columns contain pkey", K(ret)); + } else if (!info.is_right_contain_pk_ && left_contain_pk && is_left_union_pk) { + pk_origin_rows = plan->get_basic_table_metas().get_rows(left_table_id); + if (pk_origin_rows > OB_DOUBLE_EPSINON) { + is_valid = true; + if (OB_FAIL(plan->get_selectivity_ctx().get_ambient_card(left_table_id, left_ndv))) { + LOG_WARN("failed to get ambient card", K(ret)); + } else { + join_filter_selectivity = left_ndv / std::min(pk_origin_rows, info.right_distinct_card_); + } + } + } else if (info.is_right_union_pk_ && info.is_right_contain_pk_ && !left_contain_pk && OB_INVALID_ID != left_table_id) { + pk_origin_rows = info.right_origin_rows_; + is_valid = true; + double fk_origin_rows = plan->get_basic_table_metas().get_rows(left_table_id); + if (OB_FAIL(ObOptSelectivity::calculate_distinct(plan->get_update_table_metas(), + plan->get_selectivity_ctx(), + info.lexprs_, + left_path.get_path_output_rows(), + left_ndv))) { + LOG_WARN("failed to calculate distinct", K(ret), K(left_table_id), K(info)); + } else { + double fk_ndv = ObOptSelectivity::scale_distinct(left_path.get_path_output_rows(), fk_origin_rows, pk_origin_rows); + left_ndv = std::min(left_ndv, fk_ndv); + join_filter_selectivity = left_ndv / info.right_distinct_card_; + } + } + return ret; +} + int ObJoinOrder::find_shuffle_join_filter(const Path& path, bool &find) { int ret = OB_SUCCESS; @@ -12804,6 +12949,7 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, bool use_global = false; ObSEArray global_part_ids; double scale_ratio = 1.0; + bool stale_stats = false; if (OPT_CTX.use_default_stat()) { // do nothing } else if (OB_ISNULL(OPT_CTX.get_opt_stat_manager())) { @@ -12853,6 +12999,7 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, } else { last_analyzed = stat.get_last_analyzed(); is_stat_locked = stat.get_stat_locked(); + stale_stats = stat.get_stale_stats(); table_meta_info_.table_row_count_ = stat.get_row_count(); table_meta_info_.part_size_ = !use_global ? static_cast(stat.get_avg_data_size()) : static_cast(stat.get_avg_data_size() * all_used_part_id.count()) @@ -12862,7 +13009,7 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, table_meta_info_.has_opt_stat_ = has_opt_stat; LOG_TRACE("total rowcount, use statistics", K(table_meta_info_.table_row_count_), K(table_meta_info_.average_row_size_), K(table_meta_info_.micro_block_count_), - K(table_meta_info_.part_size_)); + K(table_meta_info_.part_size_), K(has_opt_stat), K(is_stat_locked), K(stale_stats)); } } @@ -12914,7 +13061,8 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, last_analyzed, is_stat_locked, table_partition_info_, - &table_meta_info_))) { + &table_meta_info_, + stale_stats))) { LOG_WARN("failed to add base table meta info", K(ret)); } } @@ -13110,7 +13258,7 @@ int ObJoinOrder::init_est_sel_info_for_subquery(const uint64_t table_id, ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid argument", K(ret), K(get_plan()), K(root), K(child_plan), K(child_stmt)); } else { - child_plan->get_selectivity_ctx().init_op_ctx(&root->get_output_equal_sets(), root->get_card()); + child_plan->get_selectivity_ctx().init_op_ctx(root); if (OB_FAIL(get_plan()->get_basic_table_metas().add_generate_table_meta_info( get_plan()->get_stmt(), static_cast(child_stmt), @@ -13221,6 +13369,417 @@ int ObJoinOrder::check_and_remove_is_null_qual(ObLogPlan *plan, return ret; } +int ObJoinOrder::merge_ambient_card(const ObIArray &left_ambient_card, + const ObIArray &right_ambient_card, + ObIArray &cur_ambient_card) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(left_ambient_card.count() != right_ambient_card.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param", K(left_ambient_card), K(right_ambient_card)); + } else if (OB_FAIL(cur_ambient_card.prepare_allocate(left_ambient_card.count()))) { + LOG_WARN("failed to allocate", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < cur_ambient_card.count(); i ++) { + double left_rowcnt = left_ambient_card.at(i); + double right_rowcnt = right_ambient_card.at(i); + if (OB_UNLIKELY(left_rowcnt >= 0 && right_rowcnt >= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected ambient card", K(left_ambient_card), K(right_ambient_card)); + } else if (left_rowcnt >= 0) { + cur_ambient_card.at(i) = left_rowcnt; + } else if (right_rowcnt >= 0) { + cur_ambient_card.at(i) = right_rowcnt; + } else { + cur_ambient_card.at(i) = -1; + } + } + return ret; +} + +int ObJoinOrder::scale_ambient_card(const double origin_rows, + const double new_rows, + const ObIArray &origin_ambient_card, + ObIArray &ambient_card) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ambient_card.assign(origin_ambient_card))) { + LOG_WARN("failed to assign", K(ret)); + } else if (new_rows < origin_rows) { + for (int64_t i = 0; i < ambient_card.count(); i ++) { + if (ambient_card.at(i) >= 0) { + ambient_card.at(i) = ObOptSelectivity::scale_distinct(new_rows, origin_rows, ambient_card.at(i)); + } + } + } + return ret; +} + +int ObJoinOrder::revise_cardinality(const ObJoinOrder *left_tree, + const ObJoinOrder *right_tree, + const JoinInfo &join_info) +{ + int ret = OB_SUCCESS; + double sel = 1.0; + EqualSets equal_sets; + ObSEArray cur_join_ambient_card; + if (OB_ISNULL(left_tree) || OB_ISNULL(right_tree) || + OB_ISNULL(get_plan()) || OB_ISNULL(OPT_CTX.get_query_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(left_tree), K(right_tree), K(get_plan()), K(ret)); + } else if (!OPT_CTX.get_query_ctx()->check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + // do nothing + } else if (OB_FAIL(append(equal_sets, left_tree->get_output_equal_sets())) || + OB_FAIL(append(equal_sets, right_tree->get_output_equal_sets()))) { + LOG_WARN("failed to append equal sets", K(ret)); + } else if (OB_FAIL(merge_ambient_card(left_tree->get_ambient_card(), right_tree->get_ambient_card(), cur_join_ambient_card))) { + LOG_WARN("failed to merge rowcnts", K(ret)); + } else if (OB_FAIL(calc_join_ambient_card(get_plan(), + *left_tree, + *right_tree, + output_rows_, + join_info, equal_sets, + cur_join_ambient_card))) { + LOG_WARN("failed to scale base table rowcnts", K(ret)); + } else if (OB_UNLIKELY(cur_join_ambient_card.count() != ambient_card_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected ambient card", K(left_tree->get_ambient_card()), + K(right_tree->get_ambient_card()), K(cur_join_ambient_card), K(ambient_card_)); + } else { + get_plan()->get_selectivity_ctx().clear(); + for (int64_t i = 0; i < ambient_card_.count(); i ++) { + ambient_card_.at(i) = std::min(ambient_card_.at(i), cur_join_ambient_card.at(i)); + } + OPT_TRACE("left output rows :", left_tree->get_output_rows(), " ambient cardinality :", left_tree->get_ambient_card()); + OPT_TRACE("right output rows :", right_tree->get_output_rows(), " ambient cardinality :", right_tree->get_ambient_card()); + OPT_TRACE("output rows of", left_tree, "join", right_tree, ":", get_output_rows(), " ambient cardinality :", cur_join_ambient_card); + OPT_TRACE("Revised ambient cardinality :", ambient_card_); + LOG_DEBUG("estimate join ambient card", K(table_set_), K(left_tree->get_tables()), K(right_tree->get_tables()), K(cur_join_ambient_card)); + } + return ret; +} + + +int ObJoinOrder::calc_join_ambient_card(ObLogPlan *plan, + const ObJoinOrder &left_tree, + const ObJoinOrder &right_tree, + const double join_output_rows, + const JoinInfo &join_info, + EqualSets &equal_sets, + ObIArray &ambient_card) +{ + int ret = OB_SUCCESS; + const ObJoinType join_type = join_info.join_type_; + JoinInfo tmp_join_info; + double left_ambient_card_sel = 1.0; + double right_ambient_card_sel = 1.0; + double where_sel_for_oj = 1.0; + double tmp_rows = 0.0; + ObSEArray join_conditions; + ObSEArray ambient_card_sels; + const ObRelIds &left_ids = left_tree.get_tables(); + const ObRelIds &right_ids = right_tree.get_tables(); + double left_output_rows = left_tree.get_output_rows(); + double right_output_rows = right_tree.get_output_rows(); + if (OB_ISNULL(plan)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(append(join_conditions, join_info.on_conditions_)) || + OB_FAIL(append(join_conditions, join_info.where_conditions_))) { + LOG_WARN("failed to append", K(ret)); + } else if (OB_FAIL(ambient_card_sels.prepare_allocate(ambient_card.count()))) { + LOG_WARN("failed to prepare allocate", K(ret)); + } else { + plan->get_selectivity_ctx().set_assumption_type(join_type); + } + for (int64_t i = 0; i < ambient_card_sels.count(); i ++) { + ambient_card_sels.at(i) = 1.0; + } + + // calculate selectivity for left ambient cardinality + if (OB_SUCC(ret)) { + tmp_join_info.join_type_ = IS_ANTI_JOIN(join_type) ? LEFT_ANTI_JOIN : LEFT_SEMI_JOIN; + tmp_join_info.where_conditions_.reuse(); + if (CONNECT_BY_JOIN == join_type) { + // todo + } else if (IS_RIGHT_SEMI_ANTI_JOIN(join_type)) { + left_ambient_card_sel = 0.0; + for (int64_t i = 0; i < ambient_card_sels.count(); i ++) { + if (left_ids.has_member(i)) { + ambient_card_sels.at(i) = 0.0; + } + } + } else if (LEFT_OUTER_JOIN == join_type || FULL_OUTER_JOIN == join_type) { + left_ambient_card_sel = 1.0; + for (int64_t i = 0; i < ambient_card_sels.count(); i ++) { + if (left_ids.has_member(i)) { + ambient_card_sels.at(i) = 1.0; + } + } + } else if (RIGHT_OUTER_JOIN == join_type && OB_FAIL(append(tmp_join_info.where_conditions_, join_info.on_conditions_))) { + LOG_WARN("failed to append on conditions", K(ret)); + } else if (!IS_OUTER_JOIN(join_type) && OB_FAIL(append(tmp_join_info.where_conditions_, join_info.where_conditions_))) { + LOG_WARN("failed to append", K(ret)); + } else if (OB_FAIL(calc_join_output_rows(plan, + left_tree.get_tables(), + right_tree.get_tables(), + left_output_rows, + right_output_rows, + tmp_join_info, + tmp_rows, + left_ambient_card_sel, + equal_sets))) { + LOG_WARN("failed to calc join output rows", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < ambient_card_sels.count(); i ++) { + if (!left_ids.has_member(i) || !join_info.table_set_.has_member(i)) { + // do nothing + } else if (OB_FAIL(calc_table_ambient_card(plan, + i, + left_tree, + right_tree, + left_output_rows, + right_output_rows, + tmp_join_info, + equal_sets, + ambient_card, + ambient_card_sels.at(i), + join_type))) { + LOG_WARN("failed to calc table ambient card", K(ret)); + } + } + } + } + + // calculate selectivity for right ambient cardinality + if (OB_SUCC(ret)) { + tmp_join_info.join_type_ = IS_ANTI_JOIN(join_type) ? RIGHT_ANTI_JOIN : RIGHT_SEMI_JOIN; + tmp_join_info.where_conditions_.reuse(); + if (CONNECT_BY_JOIN == join_type) { + // todo + } else if (IS_LEFT_SEMI_ANTI_JOIN(join_type)) { + right_ambient_card_sel = 0.0; + for (int64_t i = 0; i < ambient_card_sels.count(); i ++) { + if (right_ids.has_member(i)) { + ambient_card_sels.at(i) = 0.0; + } + } + } else if (RIGHT_OUTER_JOIN == join_type || FULL_OUTER_JOIN == join_type) { + right_ambient_card_sel = 1.0; + for (int64_t i = 0; i < ambient_card_sels.count(); i ++) { + if (right_ids.has_member(i)) { + ambient_card_sels.at(i) = 1.0; + } + } + } else if (LEFT_OUTER_JOIN == join_type && OB_FAIL(append(tmp_join_info.where_conditions_, join_info.on_conditions_))) { + LOG_WARN("failed to assign on conditions", K(ret)); + } else if (!IS_OUTER_JOIN(join_type) && OB_FAIL(append(tmp_join_info.where_conditions_, join_info.where_conditions_))) { + LOG_WARN("failed to append", K(ret)); + } else if (OB_FAIL(calc_join_output_rows(plan, + left_tree.get_tables(), + right_tree.get_tables(), + left_output_rows, + right_output_rows, + tmp_join_info, + tmp_rows, + right_ambient_card_sel, + equal_sets))) { + LOG_WARN("failed to calc join output rows", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < ambient_card_sels.count(); i ++) { + if (!right_ids.has_member(i) || !join_info.table_set_.has_member(i)) { + // do nothing + } else if (OB_FAIL(calc_table_ambient_card(plan, + i, + left_tree, + right_tree, + left_output_rows, + right_output_rows, + tmp_join_info, + equal_sets, + ambient_card, + ambient_card_sels.at(i), + join_type))) { + LOG_WARN("failed to calc table ambient card", K(ret)); + } + } + } + } + + if (OB_SUCC(ret) && IS_OUTER_JOIN(join_type) && !join_info.where_conditions_.empty()) { + plan->get_selectivity_ctx().init_join_ctx(join_type, + &left_tree.get_tables(), + &right_tree.get_tables(), + left_output_rows, + right_output_rows, + &equal_sets); + if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity( + plan->get_update_table_metas(), + plan->get_selectivity_ctx(), + join_info.where_conditions_, where_sel_for_oj, + plan->get_predicate_selectivities()))) { + LOG_WARN("failed to calc filter selectivities", K(join_info.where_conditions_), K(ret)); + } + } + + OPT_TRACE("outer join filter selectivity :", where_sel_for_oj); + OPT_TRACE("selectivity of the left side :", left_ambient_card_sel); + OPT_TRACE("selectivity of the right side :", right_ambient_card_sel); + OPT_TRACE("selectivity of each table :", ambient_card_sels); + LOG_TRACE("succeed to calc selectivity of all ambient cardinality", K(ret), K(left_ids), K(right_ids), + K(ambient_card), K(left_ambient_card_sel), K(right_ambient_card_sel), K(ambient_card_sels), K(where_sel_for_oj)); + + /** + * For (t1, t2) left join (t3, t4) on t1.c1 = t3.c1 and t2.c1 = t4.c1 and t1.c2 + t2.c2 < t3.c2 where t1.c3 <=> t3.c3 + * step 1 : table t1 is filtered by the direct join condition, `t1.c1 = t3.c1` + * step 2 : table t1 is filtered by the indirect join condition, `t2.c1 = t4.c1 and t1.c2 + t2.c2 < t3.c2` + * step 3 : table t1 is fiterred by the where condition, `t1.c3 <=> t3.c3` + */ + for (int64_t i = 0; OB_SUCC(ret) && i < ambient_card.count(); i ++) { + double step1_rows = 0; + double step2_rows = 0; + if (left_ids.has_member(i)) { + ambient_card.at(i) *= ambient_card_sels.at(i); + step1_rows = left_output_rows * ambient_card_sels.at(i); + step2_rows = left_output_rows * left_ambient_card_sel; + } else if (right_ids.has_member(i)) { + ambient_card.at(i) *= ambient_card_sels.at(i); + step1_rows = right_output_rows * ambient_card_sels.at(i); + step2_rows = right_output_rows * right_ambient_card_sel; + } + if (ambient_card.at(i) >= 0) { + if (step2_rows < step1_rows) { + ambient_card.at(i) = ObOptSelectivity::scale_distinct(step2_rows, step1_rows, ambient_card.at(i)); + } + if (std::fabs(where_sel_for_oj) <= OB_DOUBLE_EPSINON) { + ambient_card.at(i) = 0; + } else { + ambient_card.at(i) = ObOptSelectivity::scale_distinct(join_output_rows, join_output_rows / where_sel_for_oj, ambient_card.at(i)); + } + ambient_card.at(i) = std::min(join_output_rows, ambient_card.at(i)); + } + } + if (OB_SUCC(ret)) { + plan->get_selectivity_ctx().set_assumption_type(UNKNOWN_JOIN); + } + return ret; +} + +/** + * (t1 join t2 on 1 = 1) join t3 on t1.c1 = t3.c1 and t2.c2 = t3.c2 + * In this case, the ambient cardinality selectivity of (t1, t2) is invalid for single table t1 or t2. + * It is too small while the ambient cardinality of t1 and t2 might be lossless. + * So, we calculate the ambient cardinality for each table. +*/ +int ObJoinOrder::calc_table_ambient_card(ObLogPlan *plan, + uint64_t table_index, + const ObJoinOrder &left_tree, + const ObJoinOrder &right_tree, + double input_rows, + double right_rows, + const JoinInfo &join_info, + EqualSets &equal_sets, + const ObIArray &ambient_card, + double &ambient_card_sel, + const ObJoinType assumption_type) +{ + int ret = OB_SUCCESS; + JoinInfo table_join_info; + table_join_info.join_type_ = join_info.join_type_; + ObRelIds table_id; + ObRelIds exclusion_ids; + double tmp_rows = 1.0; + ambient_card_sel = 1.0; + const ObRelIds &left_ids = left_tree.get_tables(); + const ObRelIds &right_ids = right_tree.get_tables(); + bool in_left = left_ids.has_member(table_index); + bool in_right = right_ids.has_member(table_index); + if (OB_ISNULL(plan) || + OB_UNLIKELY(!IS_SEMI_ANTI_JOIN(join_info.join_type_)) || + OB_UNLIKELY(!in_left && !in_right)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unepxected param", K(plan), K(join_info), K(left_ids), K(right_ids), K(table_index)); + } else if (OB_FAIL(table_id.add_member(table_index))) { + LOG_WARN("failed to add member", K(ret)); + } else if (OB_FAIL(exclusion_ids.except(in_left ? left_ids : right_ids, table_id))) { + LOG_WARN("failed to except", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < join_info.where_conditions_.count(); i ++) { + bool is_direct_condition = false; + if (OB_FAIL(check_direct_join_condition(join_info.where_conditions_.at(i), + equal_sets, + table_id, + exclusion_ids, + is_direct_condition))) { + LOG_WARN("failed to check join condition", K(ret), K(left_ids), K(right_ids)); + } else if (!is_direct_condition) { + // do nothing + } else if (OB_FAIL(table_join_info.where_conditions_.push_back(join_info.where_conditions_.at(i)))) { + LOG_WARN("failed to push back expr", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (table_join_info.where_conditions_.empty()) { + ambient_card_sel = 1.0; + } else if (OB_FAIL(calc_join_output_rows(plan, + left_tree.get_tables(), + right_tree.get_tables(), + input_rows, + right_rows, + table_join_info, + tmp_rows, + ambient_card_sel, + equal_sets))) { + LOG_WARN("failed to calc join output rows", K(ret)); + } + return ret; +} + +/** + * For `(t1 join t2 on t1.c1 = t2.c1) join t3 on t1.c1 = t3.c1 and t1.c2 = t3.c2`, + * `t1.c1 = t3.c1` is a direct join condition for `t1`, `t2` and `t3`, + * `t1.c2 = t3.c2` is a direct join condition only for `t1` and `t3`. +*/ +int ObJoinOrder::check_direct_join_condition(ObRawExpr *expr, + const EqualSets &equal_sets, + const ObRelIds &table_id, + const ObRelIds &exclusion_ids, + bool &is_valid) +{ + int ret = OB_SUCCESS; + ObSEArray col_exprs; + is_valid = false; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected join condition", K(ret), K(expr), K(table_id)); + } else if (expr->get_relation_ids().is_superset(table_id) && + !expr->get_relation_ids().overlap(exclusion_ids)) { + is_valid = true; + } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(expr, exclusion_ids, col_exprs))) { + LOG_WARN("failed to extract column exprs", K(ret)); + } else if (col_exprs.count() == 1) { + int64_t eq_set_idx = OB_INVALID_ID; + if (OB_FAIL(ObOptimizerUtil::find_expr_in_equal_sets(equal_sets, + col_exprs.at(0), + eq_set_idx))) { + LOG_WARN("failed to find expr", K(ret)); + } else if (eq_set_idx != OB_INVALID_ID) { + const EqualSet& equal_set = *equal_sets.at(eq_set_idx); + for (int64_t j = 0; OB_SUCC(ret) && !is_valid && j < equal_set.count(); j++) { + ObRawExpr *equal_expr = equal_set.at(j); + if (OB_ISNULL(equal_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (equal_expr->get_relation_ids().equal(table_id)) { + is_valid = true; + } + } + } + } + return ret; +} + int ObJoinOrder::calc_join_output_rows(ObLogPlan *plan, const ObRelIds &left_ids, const ObRelIds &right_ids, @@ -13229,7 +13788,7 @@ int ObJoinOrder::calc_join_output_rows(ObLogPlan *plan, const JoinInfo &join_info, double &new_rows, double &selectivity, - EqualSets &equal_sets) + const EqualSets &equal_sets) { int ret = OB_SUCCESS; const ObJoinType join_type = join_info.join_type_; @@ -13243,11 +13802,11 @@ int ObJoinOrder::calc_join_output_rows(ObLogPlan *plan, right_output_rows, &equal_sets))) { } else if (INNER_JOIN == join_type) { - if (OB_FAIL(ObOptSelectivity::calculate_selectivity(plan->get_update_table_metas(), - plan->get_selectivity_ctx(), - join_info.where_conditions_, - selectivity, - plan->get_predicate_selectivities()))) { + if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity(plan->get_update_table_metas(), + plan->get_selectivity_ctx(), + join_info.where_conditions_, + selectivity, + plan->get_predicate_selectivities()))) { LOG_WARN("Failed to calc filter selectivities", K(ret)); } else { new_rows = left_output_rows * right_output_rows * selectivity; @@ -13267,7 +13826,7 @@ int ObJoinOrder::calc_join_output_rows(ObLogPlan *plan, left_has_is_null, right_has_is_null))) { LOG_WARN("failed to check and remove is null qual", K(ret)); - } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity( + } else if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity( plan->get_update_table_metas(), plan->get_selectivity_ctx(), normal_quals, oj_qual_sel, @@ -13326,7 +13885,7 @@ int ObJoinOrder::calc_join_output_rows(ObLogPlan *plan, // selectivity. So refine selectivity as output_row / (left_row * right_row) selectivity = new_rows / (left_output_rows * right_output_rows); } - } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity( + } else if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity( plan->get_update_table_metas(), plan->get_selectivity_ctx(), join_info.on_conditions_, oj_filter_sel, @@ -13356,11 +13915,11 @@ int ObJoinOrder::calc_join_output_rows(ObLogPlan *plan, } } else if (IS_SEMI_ANTI_JOIN(join_type)) { // semi/anti join is treated as table filter, use origin table metas - if (OB_FAIL(ObOptSelectivity::calculate_selectivity(plan->get_update_table_metas(), - plan->get_selectivity_ctx(), - join_info.where_conditions_, - selectivity, - plan->get_predicate_selectivities()))) { + if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity(plan->get_update_table_metas(), + plan->get_selectivity_ctx(), + join_info.where_conditions_, + selectivity, + plan->get_predicate_selectivities()))) { LOG_WARN("Failed to calc filter selectivities", K(ret)); } else { double outer_rows = IS_LEFT_SEMI_ANTI_JOIN(join_type)? @@ -13371,7 +13930,8 @@ int ObJoinOrder::calc_join_output_rows(ObLogPlan *plan, //如果有anti join的笛卡尔积,要么左表全输出、要么不输出任何行, //取决于右表是否有输出,但是,我们不应该直接估行为0, //一个简单的策略是,如果右表估行为0,那么应该输出左表的行数,而不是0 - new_rows = outer_rows - outer_rows * selectivity; + selectivity = 1 - selectivity; + new_rows = outer_rows * selectivity; if (LEFT_ANTI_JOIN == join_type && std::fabs(right_output_rows) < OB_DOUBLE_EPSINON) { new_rows = left_output_rows; @@ -13384,14 +13944,14 @@ int ObJoinOrder::calc_join_output_rows(ObLogPlan *plan, } else if (CONNECT_BY_JOIN == join_type) { double join_qual_sel = 1.0; double join_filter_sel = 1.0; - if (OB_FAIL(ObOptSelectivity::calculate_selectivity( + if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity( plan->get_update_table_metas(), plan->get_selectivity_ctx(), join_info.where_conditions_, join_qual_sel, plan->get_predicate_selectivities()))) { LOG_WARN("failed to calc filter selectivities", K(join_info.where_conditions_), K(ret)); - } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity( + } else if (OB_FAIL(ObOptSelectivity::calculate_join_selectivity( plan->get_update_table_metas(), plan->get_selectivity_ctx(), join_info.on_conditions_, @@ -13410,7 +13970,7 @@ int ObJoinOrder::calc_join_output_rows(ObLogPlan *plan, selectivity = connect_by_selectivity; } } - plan->get_selectivity_ctx().clear_equal_sets(); + plan->get_selectivity_ctx().clear(); LOG_TRACE("estimate join size and width", K(left_output_rows), K(right_output_rows), K(selectivity), K(new_rows)); return ret; @@ -14271,6 +14831,11 @@ int ObJoinOrder::generate_inner_subquery_paths(const ObDMLStmt &parent_stmt, LOG_WARN("failed to rename pushdown filter", K(ret)); } else if (OB_FAIL(append(helper.filters_, candi_nonpushdown_quals))) { LOG_WARN("failed to append", K(ret)); + } else if (OB_FAIL(ObOptimizerUtil::get_onetime_exprs(helper.pushdown_filters_, + helper.exec_params_))) { + LOG_WARN("failed to get onetime exprs", K(ret)); + } else if (OB_FAIL(append(helper.exec_params_, nl_params))) { + LOG_WARN("failed to append", K(ret)); } else if (OB_FAIL(generate_subquery_paths(helper))) { LOG_WARN("failed to generate subquery path", K(ret)); } else if (OB_FAIL(check_and_fill_inner_path_info(helper, diff --git a/src/sql/optimizer/ob_join_order.h b/src/sql/optimizer/ob_join_order.h index 4a69e58851..846e2c27e0 100644 --- a/src/sql/optimizer/ob_join_order.h +++ b/src/sql/optimizer/ob_join_order.h @@ -172,7 +172,10 @@ namespace sql pushdown_filter_table_(), in_current_dfo_(true), skip_subpart_(false), - use_column_store_(false) {} + use_column_store_(false), + is_right_contain_pk_(false), + is_right_union_pk_(false), + right_origin_rows_(1.0) {} TO_STRING_KV( K_(lexprs), @@ -191,7 +194,10 @@ namespace sql K_(force_part_filter), K_(in_current_dfo), K_(skip_subpart), - K_(use_column_store) + K_(use_column_store), + K_(is_right_contain_pk), + K_(is_right_union_pk), + K_(right_origin_rows) ); common::ObSEArray lexprs_; @@ -215,6 +221,10 @@ namespace sql // If the table is a 1-level partition, this value is false. bool skip_subpart_; bool use_column_store_; + bool is_right_contain_pk_; + bool is_right_union_pk_; + double right_origin_rows_; + }; struct EstimateCostInfo { @@ -428,6 +438,8 @@ struct EstimateCostInfo { } int compute_path_property_from_log_op(); int set_parallel_and_server_info_for_match_all(); + ObIArray &get_ambient_card() { return ambient_card_; } + const ObIArray &get_ambient_card() const { return ambient_card_; } TO_STRING_KV(K_(is_local_order), K_(ordering), K_(interesting_order_info), @@ -442,7 +454,8 @@ struct EstimateCostInfo { K_(phy_plan_type), K_(location_type), K_(is_pipelined_path), - K_(is_nl_style_pipelined_path)); + K_(is_nl_style_pipelined_path), + K_(ambient_card)); public: /** * 表示当前join order最终的父join order节点 @@ -483,6 +496,7 @@ struct EstimateCostInfo { common::ObSEArray server_list_; bool is_pipelined_path_; bool is_nl_style_pipelined_path_; + common::ObSEArray ambient_card_; private: DISALLOW_COPY_AND_ASSIGN(Path); @@ -730,7 +744,10 @@ struct EstimateCostInfo { EstimateCostInfo &right_param, bool re_est_for_op); int try_set_batch_nlj_for_right_access_path(bool enable); - int re_estimate_rows(double left_output_rows, double right_output_rows, double &row_count); + int re_estimate_rows(ObIArray &pushdown_join_filter_infos, + double left_output_rows, + double right_output_rows, + double &row_count); int cost_nest_loop_join(int64_t join_parallel, double left_output_rows, double left_cost, @@ -1214,6 +1231,8 @@ struct NullAwareAntiJoinInfo { ObSEArray expr_constraints_; ObBaseTableEstMethod est_method_; + // include nl params and onetime params + ObSEArray exec_params_; }; struct DeducedExprInfo { @@ -1698,6 +1717,8 @@ struct NullAwareAntiJoinInfo { */ int init_base_join_order(const TableItem *table_item); + int init_ambient_card(); + int generate_base_paths(); int generate_normal_base_table_paths(); @@ -1984,6 +2005,11 @@ struct NullAwareAntiJoinInfo { JoinFilterInfo& info, double &join_filter_selectivity); + int calc_join_filter_sel_for_pk_join_fk(const Path& left_path, + JoinFilterInfo& info, + double &join_filter_selectivity, + bool &is_valid); + int find_shuffle_join_filter(const Path& path, bool &find); int check_partition_join_filter_valid(const DistAlgo join_dist_algo, @@ -2120,6 +2146,9 @@ struct NullAwareAntiJoinInfo { InnerPathInfos &get_inner_path_infos() { return inner_path_infos_; } const InnerPathInfos &get_inner_path_infos() const { return inner_path_infos_; } + ObIArray &get_ambient_card() { return ambient_card_; } + const ObIArray &get_ambient_card() const { return ambient_card_; } + int64_t get_name(char *buf, const int64_t buf_len) { int64_t pos = 0; @@ -2359,7 +2388,35 @@ struct NullAwareAntiJoinInfo { const JoinInfo &join_info, double &new_rows, double &selectivity, - EqualSets &equal_sets); + const EqualSets &equal_sets); + static int merge_ambient_card(const ObIArray &left_ambient_card, + const ObIArray &right_ambient_card, + ObIArray &cur_ambient_card); + static int scale_ambient_card(const double origin_rows, + const double new_rows, + const ObIArray &origin_ambient_card, + ObIArray &ambient_card); + static int calc_join_ambient_card(ObLogPlan *plan, + const ObJoinOrder &left_tree, + const ObJoinOrder &right_tree, + const double join_output_rows, + const JoinInfo &join_info, + EqualSets &equal_sets, + ObIArray &ambient_card); + static int calc_table_ambient_card(ObLogPlan *plan, + uint64_t table_index, + const ObJoinOrder &left_ids, + const ObJoinOrder &right_ids, + double input_rows, + double right_rows, + const JoinInfo &join_info, + EqualSets &equal_sets, + const ObIArray &ambient_card, + double &new_ambient_card, + const ObJoinType assumption_type); + int revise_cardinality(const ObJoinOrder *left_tree, + const ObJoinOrder *right_tree, + const JoinInfo &join_info); inline void set_cnt_rownum(const bool cnt_rownum) { cnt_rownum_ = cnt_rownum; } inline bool get_cnt_rownum() const { return cnt_rownum_; } inline void increase_total_path_num() { total_path_num_ ++; } @@ -2377,6 +2434,12 @@ struct NullAwareAntiJoinInfo { bool &left_has_is_null, bool &right_has_is_null); + static int check_direct_join_condition(ObRawExpr *expr, + const EqualSets &equal_sets, + const ObRelIds &table_id, + const ObRelIds &exclusion_ids, + bool &is_valid); + int get_cached_inner_paths(const ObIArray &join_conditions, ObJoinOrder &left_tree, ObJoinOrder &right_tree, @@ -2557,6 +2620,7 @@ struct NullAwareAntiJoinInfo { common::ObSEArray deduced_exprs_info_; bool cnt_rownum_; uint64_t total_path_num_; + common::ObSEArray ambient_card_; private: DISALLOW_COPY_AND_ASSIGN(ObJoinOrder); }; diff --git a/src/sql/optimizer/ob_log_count.cpp b/src/sql/optimizer/ob_log_count.cpp index a37d8c2296..74336d77f6 100644 --- a/src/sql/optimizer/ob_log_count.cpp +++ b/src/sql/optimizer/ob_log_count.cpp @@ -36,8 +36,7 @@ int ObLogCount::est_cost() if (OB_ISNULL(get_plan()) || OB_ISNULL(child = get_child(ObLogicalOperator::first_child))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(get_plan()), K(child), K(ret)); - } else if (OB_FALSE_IT(get_plan()->get_selectivity_ctx().init_op_ctx( - &child->get_output_equal_sets(), child->get_card()))) { + } else if (OB_FALSE_IT(get_plan()->get_selectivity_ctx().init_op_ctx(child))) { } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(get_plan()->get_update_table_metas(), get_plan()->get_selectivity_ctx(), get_filter_exprs(), @@ -82,8 +81,7 @@ int ObLogCount::do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op OB_ISNULL(child = get_child(ObLogicalOperator::first_child))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(get_plan()), K(child), K(ret)); - } else if (OB_FALSE_IT(get_plan()->get_selectivity_ctx().init_op_ctx( - &child->get_output_equal_sets(), child->get_card()))) { + } else if (OB_FALSE_IT(get_plan()->get_selectivity_ctx().init_op_ctx(child))) { } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(get_plan()->get_basic_table_metas(), get_plan()->get_selectivity_ctx(), get_filter_exprs(), diff --git a/src/sql/optimizer/ob_log_distinct.cpp b/src/sql/optimizer/ob_log_distinct.cpp index 9910816d1e..fbb2c93103 100644 --- a/src/sql/optimizer/ob_log_distinct.cpp +++ b/src/sql/optimizer/ob_log_distinct.cpp @@ -130,18 +130,19 @@ int ObLogDistinct::est_cost() int ret = OB_SUCCESS; double distinct_cost = 0.0; ObLogicalOperator *child = NULL; + double child_ndv = total_ndv_; if (OB_ISNULL(child = get_child(ObLogicalOperator::first_child))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(child), K(ret)); - } else if (OB_UNLIKELY(total_ndv_ < 0)) { + } else if (OB_UNLIKELY(child_ndv < 0)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected total ndv", K(total_ndv_), K(ret)); - } else if (OB_FAIL(inner_est_cost(get_parallel(), child->get_card(), total_ndv_, distinct_cost))) { + LOG_WARN("get unexpected total ndv", K(child_ndv), K(ret)); + } else if (OB_FAIL(inner_est_cost(get_parallel(), child->get_card(), child_ndv, distinct_cost))) { LOG_WARN("failed to est distinct cost", K(ret)); } else { set_op_cost(distinct_cost); set_cost(child->get_cost() + distinct_cost); - set_card(total_ndv_); + set_card(child_ndv); } return ret; } @@ -200,7 +201,7 @@ int ObLogDistinct::do_re_est_cost(EstimateCostInfo ¶m, double &card, double return ret; } -int ObLogDistinct::inner_est_cost(const int64_t parallel, double child_card, double child_ndv, double &op_cost) +int ObLogDistinct::inner_est_cost(const int64_t parallel, double child_card, double &child_ndv, double &op_cost) { int ret = OB_SUCCESS; double per_dop_card = 0.0; @@ -239,6 +240,11 @@ int ObLogDistinct::inner_est_cost(const int64_t parallel, double child_card, dou distinct_exprs_, opt_ctx); } + + if (opt_ctx.get_query_ctx()->check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + child_ndv = std::min(child_card, per_dop_ndv * parallel); + } } return ret; } diff --git a/src/sql/optimizer/ob_log_distinct.h b/src/sql/optimizer/ob_log_distinct.h index 1760e77eef..eed1ccaf5e 100644 --- a/src/sql/optimizer/ob_log_distinct.h +++ b/src/sql/optimizer/ob_log_distinct.h @@ -54,7 +54,7 @@ public: virtual int est_cost() override; virtual int est_width() override; virtual int do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_cost, double &cost) override; - int inner_est_cost(const int64_t parallel, double child_card, double child_ndv, double &op_cost); + int inner_est_cost(const int64_t parallel, double child_card, double &child_ndv, double &op_cost); virtual bool is_block_op() const override { return false; } virtual int compute_fd_item_set() override; virtual int allocate_granule_post(AllocGIContext &ctx) override; diff --git a/src/sql/optimizer/ob_log_join.cpp b/src/sql/optimizer/ob_log_join.cpp index befd9a3bb8..c079969163 100644 --- a/src/sql/optimizer/ob_log_join.cpp +++ b/src/sql/optimizer/ob_log_join.cpp @@ -394,6 +394,16 @@ int ObLogJoin::inner_replace_op_exprs(ObRawExprReplacer &replacer) return ret; } +int ObLogJoin::est_ambient_card() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ambient_card_.assign(join_path_->parent_->get_ambient_card()))) { + LOG_WARN("failed to assign ambient cards", K(ret)); + } + // do nothing + return ret; +} + int ObLogJoin::do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_cost, double &cost) { int ret = OB_SUCCESS; @@ -430,9 +440,10 @@ int ObLogJoin::do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_ LOG_WARN("failed to re estimate cost", K(ret)); } else if (OB_FAIL(join_path_->try_set_batch_nlj_for_right_access_path(false))) { LOG_WARN("failed to try set batch nlj for right access path", K(ret)); - } else if (OB_FAIL(join_path_->re_estimate_rows(left_output_rows, - right_output_rows, - card))) { + } else if (OB_FAIL(join_path_->re_estimate_rows(param.join_filter_infos_, + left_output_rows, + right_output_rows, + card))) { LOG_WARN("failed to re estimate rows", K(ret)); } else if (NESTED_LOOP_JOIN == join_algo_) { if (OB_FAIL(join_path_->cost_nest_loop_join(parallel, diff --git a/src/sql/optimizer/ob_log_join.h b/src/sql/optimizer/ob_log_join.h index 65e156fbf5..b7730d0a32 100644 --- a/src/sql/optimizer/ob_log_join.h +++ b/src/sql/optimizer/ob_log_join.h @@ -119,6 +119,7 @@ namespace sql const int64_t buf_len, int64_t &pos); virtual int do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_cost, double &cost) override; + virtual int est_ambient_card() override; /* * IN right_child_sharding_info the join's right child sharding info * IN right_keys the right join equal condition diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index cd17f38b92..fe9fa69722 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -2072,6 +2072,10 @@ int ObLogPlan::inner_generate_join_order(ObIArray &join_rels, OPT_TRACE_TITLE("Now", left_tree, "join", right_tree, join_info); if (OB_FAIL(ret)) { //do nothing + } else if (OB_FAIL(join_tree->revise_cardinality(left_tree, + right_tree, + join_info))) { + LOG_WARN("failed to revise ambient card", K(ret)); } else if (OB_FAIL(join_tree->generate_join_paths(*left_tree, *right_tree, join_info, @@ -2638,8 +2642,16 @@ int ObLogPlan::generate_subplan_for_query_ref(ObQueryRefRawExpr *query_ref, LOG_WARN("failed to create plan", K(ret), K(opt_ctx.get_query_ctx()->get_sql_stmt())); } else if (FALSE_IT(logical_plan->set_nonrecursive_plan_for_fake_cte(get_nonrecursive_plan_for_fake_cte()))) { // never reach + } else if (OB_FAIL(logical_plan->add_exec_params_meta(query_ref->get_exec_params(), + get_basic_table_metas(), + get_selectivity_ctx()))) { + LOG_WARN("failed to prepare exec param meta", K(ret)); } else if (OB_FAIL(SMART_CALL(static_cast(logical_plan)->generate_raw_plan()))) { LOG_WARN("failed to optimize sub-select", K(ret)); + } else if (OB_FAIL(add_query_ref_meta(query_ref, + logical_plan->get_update_table_metas(), + logical_plan->get_selectivity_ctx()))) { + LOG_WARN("failed to add expr meta", K(ret)); } else { SubPlanInfo *info = static_cast(get_allocator().alloc(sizeof(SubPlanInfo))); bool has_ref_assign_user_var = false; @@ -2677,6 +2689,68 @@ int ObLogPlan::generate_subplan_for_query_ref(ObQueryRefRawExpr *query_ref, return ret; } +int ObLogPlan::add_exec_params_meta(ObIArray &exec_params, + const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < exec_params.count(); i ++) { + ObExecParamRawExpr *exec_param = exec_params.at(i); + double avg_len = 0; + OptDynamicExprMeta dynamic_expr_meta; + if (OB_ISNULL(exec_param)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null param", K(ret), KPC(exec_param)); + } else if (OB_FAIL(ObOptSelectivity::calculate_expr_avg_len(table_metas, + ctx, + exec_param, + avg_len))) { + LOG_WARN("failed to calc expr avg len", K(ret), KPC(exec_param)); + } else { + dynamic_expr_meta.set_expr(exec_param); + dynamic_expr_meta.set_avg_len(avg_len); + } + if (FAILEDx(get_basic_table_metas().add_dynamic_expr_meta(dynamic_expr_meta))) { + LOG_WARN("failed to add expr meta", K(ret)); + } else if (OB_FAIL(get_update_table_metas().add_dynamic_expr_meta(dynamic_expr_meta))) { + LOG_WARN("failed to add expr meta", K(ret)); + } + } + return ret; +} + +int ObLogPlan::add_query_ref_meta(ObQueryRefRawExpr *expr, + const OptTableMetas &child_table_metas, + const OptSelectivityCtx &child_ctx) +{ + int ret = OB_SUCCESS; + ObRawExpr *ref_expr = NULL; + double avg_len = 0; + OptDynamicExprMeta dynamic_expr_meta; + ObSelectStmt *stmt = NULL; + if (OB_ISNULL(expr) || OB_ISNULL(stmt = expr->get_ref_stmt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null param", K(ret), KPC(expr)); + } else if (!expr->is_scalar()) { + // do nothing + } else if (OB_UNLIKELY(stmt->get_select_item_size() != 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected query ref", K(ret), KPC(stmt)); + } else if (OB_FAIL(ObOptSelectivity::calculate_expr_avg_len( + child_table_metas, child_ctx, stmt->get_select_item(0).expr_, avg_len))) { + LOG_WARN("failed to calc expr avg len", K(ret), KPC(expr)); + } else { + dynamic_expr_meta.set_expr(expr); + dynamic_expr_meta.set_avg_len(avg_len); + if (OB_FAIL(get_basic_table_metas().add_dynamic_expr_meta(dynamic_expr_meta))) { + LOG_WARN("failed to add expr meta", K(ret)); + } else if (OB_FAIL(get_update_table_metas().add_dynamic_expr_meta(dynamic_expr_meta))) { + LOG_WARN("failed to add expr meta", K(ret)); + } + } + return ret; +} + //在已有sub_plan_infos中查找expr对应的subplan int ObLogPlan::get_subplan(const ObRawExpr *expr, SubPlanInfo *&info) { @@ -5556,7 +5630,7 @@ int ObLogPlan::init_groupby_helper(const ObIArray &group_exprs, } if (OB_SUCC(ret)) { - get_selectivity_ctx().init_op_ctx(&best_plan->get_output_equal_sets(), best_plan->get_card()); + get_selectivity_ctx().init_op_ctx(best_plan); if (group_rollup_exprs.empty()) { groupby_helper.group_ndv_ = 1.0; } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(get_update_table_metas(), @@ -5582,7 +5656,7 @@ int ObLogPlan::calculate_group_distinct_ndv(const ObIArray &groupby_ ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); } else { - get_selectivity_ctx().init_op_ctx(&best_plan->get_output_equal_sets(), best_plan->get_card()); + get_selectivity_ctx().init_op_ctx(best_plan); } for (int64_t i = 0; OB_SUCC(ret) && i < groupby_helper.distinct_aggr_batch_.count(); ++i) { ObSEArray group_distinct_exprs; @@ -5647,7 +5721,7 @@ int ObLogPlan::init_distinct_helper(const ObIArray &distinct_exprs, } if (OB_SUCC(ret)) { - get_selectivity_ctx().init_op_ctx(&best_plan->get_output_equal_sets(), best_plan->get_card()); + get_selectivity_ctx().init_op_ctx(best_plan); if (distinct_exprs.empty()) { distinct_helper.group_ndv_ = 1.0; } else if (get_stmt()->is_set_stmt()) { @@ -8131,7 +8205,7 @@ int ObLogPlan::generate_subplan_filter_info(const ObIArray &subquer SubPlanInfo *info = NULL; if (OB_FAIL(get_subplan(candi_query_refs.at(i), info))) { LOG_WARN("failed to get subplan", K(ret)); - } else if (NULL != info && !for_on_condition) { + } else if (NULL != info && !for_on_condition && info->allocated_) { // do nothing } else if (OB_FAIL(append(exec_params, candi_query_refs.at(i)->get_exec_params()))) { LOG_WARN("failed to append exec params", K(ret)); @@ -8144,6 +8218,7 @@ int ObLogPlan::generate_subplan_filter_info(const ObIArray &subquer LOG_WARN("failed to push back query ref expr", K(ret)); } else { ++ idx; + info->allocated_ = true; for_cursor_expr = for_cursor_expr || candi_query_refs.at(i)->is_cursor(); if (info->init_plan_) { if (ObOptimizerUtil::find_item(onetime_query_refs, candi_query_refs.at(i))) { @@ -8799,14 +8874,14 @@ int ObLogPlan::candi_allocate_filter(const ObIArray &filter_exprs) LOG_WARN("get unexpected null", K(ret)); } else if (OB_FAIL(best_plan->get_input_equal_sets(equal_sets))) { LOG_WARN("failed to get input equal sets", K(ret)); - } else if (OB_FALSE_IT(get_selectivity_ctx().init_op_ctx(&equal_sets, best_plan->get_card()))) { + } else if (OB_FALSE_IT(get_selectivity_ctx().init_op_ctx(best_plan))) { } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(get_update_table_metas(), get_selectivity_ctx(), filter_exprs, sel, get_predicate_selectivities()))) { LOG_WARN("failed to calc selectivity", K(ret)); - } else if (OB_FALSE_IT(get_selectivity_ctx().init_op_ctx(NULL, -1.0))) { + } else if (OB_FALSE_IT(get_selectivity_ctx().clear())) { } else { for (int64_t i = 0; OB_SUCC(ret) && i < candidates_.candidate_plans_.count(); i++) { ObLogicalOperator *top = NULL; @@ -9056,6 +9131,7 @@ int ObLogPlan::init_onetime_subquery_info() bool dummy_shared = false; ObRawExpr *expr = exprs.at(i); ObSEArray onetime_list; + ObSEArray queryref_list; if (OB_ISNULL(expr)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("expr is null", K(ret)); @@ -9069,6 +9145,24 @@ int ObLogPlan::init_onetime_subquery_info() // do nothing } else if (OB_FAIL(create_onetime_param(expr, onetime_list))) { LOG_WARN("failed to create onetime param expr", K(ret)); + } else if (OB_FAIL(ObTransformUtils::extract_query_ref_expr(onetime_list, + queryref_list, + false))) { + LOG_WARN("failed to extract query ref exprs", K(ret)); + } + for (int64_t j = 0; OB_SUCC(ret) && j < queryref_list.count(); ++j) { + SubPlanInfo *info = NULL; + ObQueryRefRawExpr *onetime_queryref_expr = queryref_list.at(j); + if (OB_ISNULL(onetime_queryref_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected onetime expr", K(ret), KPC(onetime_queryref_expr)); + } else if (OB_FAIL(get_subplan(onetime_queryref_expr, info))) { + LOG_WARN("failed to get subplan", K(ret)); + } else if (NULL != info) { + // do nothing + } else if (OB_FAIL(generate_subplan_for_query_ref(onetime_queryref_expr, info))) { + LOG_WARN("failed to generate subplan for query ref", K(ret)); + } } } } @@ -13391,12 +13485,12 @@ int ObLogPlan::fill_join_filter_info(JoinFilterInfo &join_filter_info) if (OB_ISNULL(stmt = get_stmt())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null", K(ret), K(get_stmt())); + } else if (FALSE_IT(get_selectivity_ctx().clear())) { } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(get_update_table_metas(), - get_selectivity_ctx(), - join_filter_info.rexprs_, - join_filter_info.row_count_, - join_filter_info.right_distinct_card_, - false))) { + get_selectivity_ctx(), + join_filter_info.rexprs_, + join_filter_info.row_count_, + join_filter_info.right_distinct_card_))) { LOG_WARN("failed to calc distinct", K(ret)); } else if (join_filter_info.table_id_ == join_filter_info.filter_table_id_) { /* do nothing */ diff --git a/src/sql/optimizer/ob_log_plan.h b/src/sql/optimizer/ob_log_plan.h index b6f84085f1..a645e52f19 100644 --- a/src/sql/optimizer/ob_log_plan.h +++ b/src/sql/optimizer/ob_log_plan.h @@ -102,16 +102,17 @@ struct TableDependInfo { struct SubPlanInfo { - SubPlanInfo() : init_expr_(NULL), subplan_(NULL), init_plan_(false) {} + SubPlanInfo() : init_expr_(NULL), subplan_(NULL), init_plan_(false), allocated_(false) {} SubPlanInfo(ObQueryRefRawExpr *expr, ObLogPlan *plan, bool init_) - : init_expr_(expr), subplan_(plan), init_plan_(init_) {} + : init_expr_(expr), subplan_(plan), init_plan_(init_), allocated_(false) {} virtual ~SubPlanInfo() {} void set_subplan(ObLogPlan *plan) { subplan_ = plan; } ObQueryRefRawExpr *init_expr_; ObLogPlan *subplan_; bool init_plan_; - TO_STRING_KV(K_(init_expr), K_(subplan), K_(init_plan)); + bool allocated_; + TO_STRING_KV(K_(init_expr), K_(subplan), K_(init_plan), K_(allocated)); }; struct ObDistinctAggrBatch @@ -426,6 +427,13 @@ public: const ObInsertStmt *get_insert_stmt() const { return insert_stmt_; } void set_nonrecursive_plan_for_fake_cte(ObSelectLogPlan *plan) { nonrecursive_plan_for_fake_cte_ = plan; } ObSelectLogPlan *get_nonrecursive_plan_for_fake_cte() { return nonrecursive_plan_for_fake_cte_; } + + int add_exec_params_meta(ObIArray &exec_params, + const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx); + int add_query_ref_meta(ObQueryRefRawExpr *expr, + const OptTableMetas &child_table_metas, + const OptSelectivityCtx &child_ctx); public: struct All_Candidate_Plans diff --git a/src/sql/optimizer/ob_log_set.cpp b/src/sql/optimizer/ob_log_set.cpp index a93f6cc88a..3592a5453c 100644 --- a/src/sql/optimizer/ob_log_set.cpp +++ b/src/sql/optimizer/ob_log_set.cpp @@ -492,6 +492,12 @@ int ObLogSet::get_re_est_cost_infos(const EstimateCostInfo ¶m, return ret; } +int ObLogSet::est_ambient_card() +{ + // do nothing + return OB_SUCCESS; +} + int ObLogSet::do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_cost, double &cost) { int ret = OB_SUCCESS; diff --git a/src/sql/optimizer/ob_log_set.h b/src/sql/optimizer/ob_log_set.h index 813d4541e9..2ccca9a687 100644 --- a/src/sql/optimizer/ob_log_set.h +++ b/src/sql/optimizer/ob_log_set.h @@ -77,6 +77,7 @@ public: virtual int est_cost() override; virtual int est_width() override; virtual int do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_cost, double &cost) override; + virtual int est_ambient_card() override; int get_re_est_cost_infos(const EstimateCostInfo ¶m, ObIArray &cost_infos, double &child_cost, diff --git a/src/sql/optimizer/ob_log_sort.cpp b/src/sql/optimizer/ob_log_sort.cpp index 9a10893e68..49cb83bd6a 100644 --- a/src/sql/optimizer/ob_log_sort.cpp +++ b/src/sql/optimizer/ob_log_sort.cpp @@ -462,6 +462,7 @@ int ObLogSort::inner_est_cost(const int64_t parallel, double child_card, double if (NULL != topn_expr_) { double_topn_count = static_cast(topn_count); } + get_plan()->get_selectivity_ctx().init_op_ctx(child); double child_card_per_dop = child_card / parallel; if (double_topn_count > child_card_per_dop) { double_topn_count = child_card_per_dop; diff --git a/src/sql/optimizer/ob_log_subplan_filter.cpp b/src/sql/optimizer/ob_log_subplan_filter.cpp index 4763261877..b62c58abe5 100644 --- a/src/sql/optimizer/ob_log_subplan_filter.cpp +++ b/src/sql/optimizer/ob_log_subplan_filter.cpp @@ -191,6 +191,15 @@ int ObLogSubPlanFilter::get_plan_item_info(PlanText &plan_text, return ret; } +int ObLogSubPlanFilter::est_ambient_card() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(inner_est_ambient_card_by_child(ObLogicalOperator::first_child))) { + LOG_WARN("failed to est ambient cards by first child", K(ret), K(get_type())); + } + return ret; +} + int ObLogSubPlanFilter::est_cost() { int ret = OB_SUCCESS; @@ -224,7 +233,7 @@ int ObLogSubPlanFilter::do_re_est_cost(EstimateCostInfo ¶m, double &card, do LOG_WARN("unexpected params", K(ret), K(get_plan()), K(child), K(param.need_parallel_)); } else if (param.need_row_count_ < 0 || param.need_row_count_ >= child->get_card()) { param.need_row_count_ = -1; - } else if (OB_FALSE_IT(get_plan()->get_selectivity_ctx().init_op_ctx(&child->get_output_equal_sets(), child->get_card()))) { + } else if (OB_FALSE_IT(get_plan()->get_selectivity_ctx().init_op_ctx(child))) { } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(get_plan()->get_basic_table_metas(), get_plan()->get_selectivity_ctx(), get_filter_exprs(), diff --git a/src/sql/optimizer/ob_log_subplan_filter.h b/src/sql/optimizer/ob_log_subplan_filter.h index 984fe10351..89a08340d0 100644 --- a/src/sql/optimizer/ob_log_subplan_filter.h +++ b/src/sql/optimizer/ob_log_subplan_filter.h @@ -39,6 +39,7 @@ public: virtual int do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_cost, double &cost) override; // re est children cost and gather cost infos int get_re_est_cost_infos(const EstimateCostInfo ¶m, ObIArray &cost_infos); + virtual int est_ambient_card() override; inline int add_subquery_exprs(const ObIArray &query_exprs) { diff --git a/src/sql/optimizer/ob_log_table_scan.cpp b/src/sql/optimizer/ob_log_table_scan.cpp index dcf98d6a71..bb935d2a58 100644 --- a/src/sql/optimizer/ob_log_table_scan.cpp +++ b/src/sql/optimizer/ob_log_table_scan.cpp @@ -1352,7 +1352,13 @@ int ObLogTableScan::get_plan_item_info(PlanText &plan_text, } else if (OB_ISNULL(table_meta = plan->get_basic_table_metas().get_table_meta_by_table_id(table_id_))) { //do nothing - } else if (OB_FAIL(BUF_PRINTF("stats version:%ld", table_meta->get_version()))) { + } else if (OB_FAIL(BUF_PRINTF("stats info:[version=%ld", table_meta->get_version()))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF(", is_locked=%d", table_meta->is_stat_locked()))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF(", is_expired=%d", table_meta->is_opt_stat_expired()))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF("]"))) { LOG_WARN("BUF_PRINTF fails", K(ret)); } else if (OB_FAIL(BUF_PRINTF(NEW_LINE))) { LOG_WARN("BUF_PRINTF fails", K(ret)); diff --git a/src/sql/optimizer/ob_log_temp_table_access.cpp b/src/sql/optimizer/ob_log_temp_table_access.cpp index c10286ac4f..6817e390de 100644 --- a/src/sql/optimizer/ob_log_temp_table_access.cpp +++ b/src/sql/optimizer/ob_log_temp_table_access.cpp @@ -98,6 +98,7 @@ int ObLogTempTableAccess::do_re_est_cost(EstimateCostInfo ¶m, double &card, cost = get_cost(); double selectivity = 1.0; const int64_t parallel = param.need_parallel_; + get_plan()->get_selectivity_ctx().init_op_ctx(NULL, -1); if (OB_ISNULL(get_plan())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(get_plan()),K(ret)); diff --git a/src/sql/optimizer/ob_log_temp_table_transformation.cpp b/src/sql/optimizer/ob_log_temp_table_transformation.cpp index 76fd7708c4..084ff2b7b8 100644 --- a/src/sql/optimizer/ob_log_temp_table_transformation.cpp +++ b/src/sql/optimizer/ob_log_temp_table_transformation.cpp @@ -124,6 +124,16 @@ int ObLogTempTableTransformation::compute_op_parallel_and_server_info() return ret; } +int ObLogTempTableTransformation::est_ambient_card() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(inner_est_ambient_card_by_child(get_num_of_child() - 1))) { + LOG_WARN("failed to est ambient cards by last child", K(ret), K(get_type())); + } + return ret; +} + + int ObLogTempTableTransformation::do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_cost, double &cost) { int ret = OB_SUCCESS; diff --git a/src/sql/optimizer/ob_log_temp_table_transformation.h b/src/sql/optimizer/ob_log_temp_table_transformation.h index 03eab9f454..4d4d9418f8 100644 --- a/src/sql/optimizer/ob_log_temp_table_transformation.h +++ b/src/sql/optimizer/ob_log_temp_table_transformation.h @@ -34,6 +34,7 @@ public: virtual bool is_block_input(const int64_t child_idx) const override { return child_idx != get_num_of_child() - 1; } virtual int compute_op_parallel_and_server_info() override; virtual int do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_cost, double &cost) override; + virtual int est_ambient_card() override; int get_temp_table_exprs(ObIArray &set_exprs) const; int allocate_startup_expr_post() override; virtual int get_card_without_filter(double &card) override; diff --git a/src/sql/optimizer/ob_logical_operator.cpp b/src/sql/optimizer/ob_logical_operator.cpp index 755d46a0d5..23f35096d3 100644 --- a/src/sql/optimizer/ob_logical_operator.cpp +++ b/src/sql/optimizer/ob_logical_operator.cpp @@ -1067,6 +1067,8 @@ int ObLogicalOperator::compute_property(Path *path) set_server_cnt(path->server_cnt_); if (OB_FAIL(server_list_.assign(path->server_list_))) { LOG_WARN("failed to assign path's server list to op", K(ret)); + } else if (OB_FAIL(ambient_card_.assign(path->parent_->get_ambient_card()))) { + LOG_WARN("failed to assign ambient cards", K(ret)); } else if (OB_FAIL(check_property_valid())) { LOG_WARN("failed to check property valid", K(ret), KPC(path)); } else { @@ -1273,6 +1275,8 @@ int ObLogicalOperator::compute_property() LOG_WARN("failed to compute width", K(ret)); } else if (OB_FAIL(est_cost())) { LOG_WARN("failed to estimate cost", K(ret)); + } else if (OB_FAIL(est_ambient_card())) { + LOG_WARN("failed to est ambient card"); } else if (OB_FAIL(check_property_valid())) { LOG_WARN("failed to check property valid", K(ret)); } else { @@ -1299,6 +1303,41 @@ int ObLogicalOperator::compute_property() return ret; } +int ObLogicalOperator::est_ambient_card() +{ + int ret = OB_SUCCESS; + if (1 == get_num_of_child()) { + if (OB_FAIL(inner_est_ambient_card_by_child(ObLogicalOperator::first_child))) { + LOG_WARN("failed to est ambient cards by first child", K(ret), K(get_type())); + } + } else if (0 == get_num_of_child()) { + // do nothing + // ambient cardinality of the leaf node is inited by the path + } else { + // ret = OB_ERR_UNEXPECTED; + LOG_WARN("multi child op called default est_ambient_card function", K(ret), K(get_type())); + } + return ret; +} + +int ObLogicalOperator::inner_est_ambient_card_by_child(int64_t child_idx) +{ + int ret = OB_SUCCESS; + ObLogicalOperator *child = NULL; + if (OB_UNLIKELY(child_idx >= get_num_of_child()) || + OB_ISNULL(child = get_child(child_idx))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(child), K(ret)); + } else if (OB_FAIL(ambient_card_.assign(child->get_ambient_card()))) { + LOG_WARN("failed to assign", K(ret)); + } else { + for (int64_t i = 0; i < ambient_card_.count(); i ++) { + ambient_card_.at(i) = ObOptSelectivity::scale_distinct(get_card(), child->get_card(), ambient_card_.at(i)); + } + } + return ret; +} + int ObLogicalOperator::check_property_valid() const { int ret = OB_SUCCESS; diff --git a/src/sql/optimizer/ob_logical_operator.h b/src/sql/optimizer/ob_logical_operator.h index 09060b52dd..4fd1dcb449 100644 --- a/src/sql/optimizer/ob_logical_operator.h +++ b/src/sql/optimizer/ob_logical_operator.h @@ -1323,6 +1323,9 @@ public: int re_est_cost(EstimateCostInfo ¶m, double &card, double &cost); virtual int do_re_est_cost(EstimateCostInfo ¶m, double &card, double &op_cost, double &cost); + virtual int est_ambient_card(); + int inner_est_ambient_card_by_child(int64_t child_idx); + /** * @brief compute_property * convert property fields from a path into a logical operator @@ -1719,6 +1722,8 @@ public: virtual int close_px_resource_analyze(CLOSE_PX_RESOURCE_ANALYZE_DECLARE_ARG); int find_max_px_resource_child(OPEN_PX_RESOURCE_ANALYZE_DECLARE_ARG, int64_t start_idx); + inline ObIArray &get_ambient_card() { return ambient_card_; } + public: ObSEArray child_; ObSEArray equal_param_constraints_; @@ -1800,6 +1805,7 @@ protected: const EqualSets *output_equal_sets_; const ObFdItemSet *fd_item_set_; const ObRelIds *table_set_; + common::ObSEArray ambient_card_; uint64_t id_; // operator 0-based depth-first id uint64_t branch_id_; diff --git a/src/sql/optimizer/ob_opt_default_stat.h b/src/sql/optimizer/ob_opt_default_stat.h index 9cc301d8ae..821440bad5 100644 --- a/src/sql/optimizer/ob_opt_default_stat.h +++ b/src/sql/optimizer/ob_opt_default_stat.h @@ -73,8 +73,8 @@ const double DEFAULT_SEL = 0.5; const double DEFAULT_AGG_RANGE = 0.05; // [aggr(expr) = const]的默认选择率,参考oracle const double DEFAULT_AGG_EQ = 0.01; -// clob/blob like "xxx" 的默认选择率 -const double DEFAULT_CLOB_LIKE_SEL = 0.05; +// like 的默认选择率,参考oracle +const double DEFAULT_LIKE_SEL = 0.05; const double DEFAULT_ANTI_JOIN_SEL = 0.01; // 范围谓词越界部分选择率,参考 SQLserver const double DEFAULT_OUT_OF_BOUNDS_SEL = 0.3; diff --git a/src/sql/optimizer/ob_opt_est_cost.cpp b/src/sql/optimizer/ob_opt_est_cost.cpp index 09878a45c4..4ac4349400 100644 --- a/src/sql/optimizer/ob_opt_est_cost.cpp +++ b/src/sql/optimizer/ob_opt_est_cost.cpp @@ -677,39 +677,53 @@ int ObOptEstCost::calculate_filter_selectivity(ObCostTableScanInfo &est_cost_inf ObIArray &all_predicate_sel) { int ret = OB_SUCCESS; - if (OB_ISNULL(est_cost_info.table_metas_) || OB_ISNULL(est_cost_info.sel_ctx_)) { + ObSEArray apply_filters; + double total_sel = 1.0; + if (OB_ISNULL(est_cost_info.table_metas_) || OB_ISNULL(est_cost_info.sel_ctx_) || + OB_ISNULL(est_cost_info.table_meta_info_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null point error", K(est_cost_info.table_metas_), K(est_cost_info.sel_ctx_), K(ret)); - } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, - *est_cost_info.sel_ctx_, - est_cost_info.prefix_filters_, - est_cost_info.prefix_filter_sel_, - all_predicate_sel))) { - LOG_WARN("failed to calculate selectivity", K(est_cost_info.postfix_filters_), K(ret)); - } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, - *est_cost_info.sel_ctx_, - est_cost_info.pushdown_prefix_filters_, - est_cost_info.pushdown_prefix_filter_sel_, - all_predicate_sel))) { - LOG_WARN("failed to calculate selectivity", K(est_cost_info.pushdown_prefix_filters_), K(ret)); - } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, - *est_cost_info.sel_ctx_, - est_cost_info.ss_postfix_range_filters_, - est_cost_info.ss_postfix_range_filters_sel_, - all_predicate_sel))) { - LOG_WARN("failed to calculate selectivity", K(est_cost_info.ss_postfix_range_filters_), K(ret)); - } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, - *est_cost_info.sel_ctx_, - est_cost_info.postfix_filters_, - est_cost_info.postfix_filter_sel_, - all_predicate_sel))) { - LOG_WARN("failed to calculate selectivity", K(est_cost_info.postfix_filters_), K(ret)); - } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, - *est_cost_info.sel_ctx_, - est_cost_info.table_filters_, - est_cost_info.table_filter_sel_, - all_predicate_sel))) { - LOG_WARN("failed to calculate selectivity", K(est_cost_info.table_filters_), K(ret)); + } else if (FALSE_IT(est_cost_info.sel_ctx_->init_op_ctx(NULL, est_cost_info.table_meta_info_->table_row_count_))) { + } else if (OB_FAIL(ObOptSelectivity::calculate_conditional_selectivity(*est_cost_info.table_metas_, + *est_cost_info.sel_ctx_, + apply_filters, + est_cost_info.prefix_filters_, + total_sel, + est_cost_info.prefix_filter_sel_, + all_predicate_sel))) { + LOG_WARN("failed to calculate prefix filter sel", K(est_cost_info.prefix_filters_)); + } else if (OB_FAIL(ObOptSelectivity::calculate_conditional_selectivity(*est_cost_info.table_metas_, + *est_cost_info.sel_ctx_, + apply_filters, + est_cost_info.pushdown_prefix_filters_, + total_sel, + est_cost_info.pushdown_prefix_filter_sel_, + all_predicate_sel))) { + LOG_WARN("failed to calculate prefix filter sel", K(est_cost_info.pushdown_prefix_filters_)); + } else if (OB_FAIL(ObOptSelectivity::calculate_conditional_selectivity(*est_cost_info.table_metas_, + *est_cost_info.sel_ctx_, + apply_filters, + est_cost_info.ss_postfix_range_filters_, + total_sel, + est_cost_info.ss_postfix_range_filters_sel_, + all_predicate_sel))) { + LOG_WARN("failed to calculate prefix filter sel", K(est_cost_info.ss_postfix_range_filters_)); + } else if (OB_FAIL(ObOptSelectivity::calculate_conditional_selectivity(*est_cost_info.table_metas_, + *est_cost_info.sel_ctx_, + apply_filters, + est_cost_info.postfix_filters_, + total_sel, + est_cost_info.postfix_filter_sel_, + all_predicate_sel))) { + LOG_WARN("failed to calculate prefix filter sel", K(est_cost_info.postfix_filters_)); + } else if (OB_FAIL(ObOptSelectivity::calculate_conditional_selectivity(*est_cost_info.table_metas_, + *est_cost_info.sel_ctx_, + apply_filters, + est_cost_info.table_filters_, + total_sel, + est_cost_info.table_filter_sel_, + all_predicate_sel))) { + LOG_WARN("failed to calculate prefix filter sel", K(est_cost_info.table_filters_)); } else { LOG_TRACE("table filter info", K(est_cost_info.ref_table_id_), K(est_cost_info.index_id_), K(est_cost_info.prefix_filters_), K(est_cost_info.pushdown_prefix_filters_), diff --git a/src/sql/optimizer/ob_opt_est_utils.cpp b/src/sql/optimizer/ob_opt_est_utils.cpp index b32c27e1ce..0e2124521e 100644 --- a/src/sql/optimizer/ob_opt_est_utils.cpp +++ b/src/sql/optimizer/ob_opt_est_utils.cpp @@ -75,21 +75,34 @@ int ObOptEstUtils::extract_column_exprs_with_op_check( } -int ObOptEstUtils::is_range_expr(const ObRawExpr *qual, bool &is_simple_filter, const int64_t level) +int ObOptEstUtils::is_range_expr(const ObRawExpr *qual, bool &is_simple_filter) { int ret = OB_SUCCESS; - if (0 == level) { - is_simple_filter = true; - } + is_simple_filter = true; if (OB_ISNULL(qual)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("qual is null", K(ret)); - } else if (IS_RANGE_CMP_OP(qual->get_expr_type()) && qual->has_flag(IS_RANGE_COND)) { - // c1 > 1 , 1 < c1 do nothing + } else if (IS_RANGE_CMP_OP(qual->get_expr_type()) || + T_OP_BTW == qual->get_expr_type() || + T_OP_NOT_BTW == qual->get_expr_type()) { + // c1 > 1 , 1 < c1, c1 (not) between 1 and '2' + const ObRawExpr *var = NULL; + const ObRawExpr *const_expr1 = NULL; + const ObRawExpr *const_expr2 = NULL; + ObItemType dummy = T_INVALID; + if (OB_FAIL(extract_var_op_const(qual, var, const_expr1, const_expr2, dummy, is_simple_filter))) { + LOG_WARN("failed to extract var", K(ret)); + } else if (!is_simple_filter) { + // do nothing + } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(var, var))) { + LOG_WARN("failed to get expr without lossless cast", K(ret)); + } else if (!var->is_column_ref_expr()) { + is_simple_filter = false; + } } else if (T_OP_AND == qual->get_expr_type() || T_OP_OR == qual->get_expr_type()) { const ObOpRawExpr *op_expr = static_cast(qual); for (int idx = 0 ; idx < op_expr->get_param_count() && is_simple_filter && OB_SUCC(ret); ++idx) { - if (OB_FAIL(is_range_expr(op_expr->get_param_expr(idx), is_simple_filter, level + 1))) { + if (OB_FAIL(is_range_expr(op_expr->get_param_expr(idx), is_simple_filter))) { LOG_WARN("failed to judge if expr is range", K(ret)); } } @@ -99,6 +112,70 @@ int ObOptEstUtils::is_range_expr(const ObRawExpr *qual, bool &is_simple_filter, return ret; } +int ObOptEstUtils::extract_var_op_const(const ObRawExpr *qual, + const ObRawExpr *&var_expr, + const ObRawExpr *&const_expr1, + const ObRawExpr *&const_expr2, + ObItemType &type, + bool &is_valid) +{ + int ret = OB_SUCCESS; + type = T_INVALID; + is_valid = false; + var_expr = NULL; + const_expr1 = NULL; + const_expr2 = NULL; + if (OB_ISNULL(qual)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (FALSE_IT(type = qual->get_expr_type())) { + } else if (IS_RANGE_CMP_OP(type) || T_OP_EQ == type || T_OP_NSEQ == type || T_OP_NE == type) { + if (OB_UNLIKELY(qual->get_param_count() != 2) || + OB_ISNULL(qual->get_param_expr(0)) || + OB_ISNULL(qual->get_param_expr(1))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param", KPC(qual)); + } else if (!qual->get_param_expr(0)->is_const_expr() && + qual->get_param_expr(1)->is_const_expr()) { + var_expr = qual->get_param_expr(0); + const_expr1 = qual->get_param_expr(1); + is_valid = true; + } else if (!qual->get_param_expr(1)->is_const_expr() && + qual->get_param_expr(0)->is_const_expr()) { + var_expr = qual->get_param_expr(1); + const_expr1 = qual->get_param_expr(0); + is_valid = true; + type = get_opposite_compare_type(type); + } + } else if (T_OP_IS == type || T_OP_IS_NOT == type) { + if (OB_UNLIKELY(qual->get_param_count() != 2) || + OB_ISNULL(qual->get_param_expr(0)) || + OB_ISNULL(qual->get_param_expr(1))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param", KPC(qual)); + } else if (!qual->get_param_expr(0)->is_const_expr() && + qual->get_param_expr(1)->is_const_expr()) { + var_expr = qual->get_param_expr(0); + const_expr1 = qual->get_param_expr(1); + is_valid = true; + } + } else if (T_OP_BTW == type || T_OP_NOT_BTW == type) { + if (OB_UNLIKELY(3 != qual->get_param_count()) || OB_ISNULL(qual->get_param_expr(0)) || + OB_ISNULL(qual->get_param_expr(1)) || OB_ISNULL(qual->get_param_expr(2))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param", K(ret), KPC(qual)); + } else if (!qual->get_param_expr(0)->is_const_expr() && + qual->get_param_expr(1)->is_const_expr() && + qual->get_param_expr(2)->is_const_expr()) { + var_expr = qual->get_param_expr(0); + const_expr1 = qual->get_param_expr(1); + const_expr2 = qual->get_param_expr(2); + is_valid = true; + } + } + return ret; +} + int ObOptEstUtils::extract_simple_cond_filters(ObRawExpr &qual, bool &can_be_extracted, ObIArray &column_exprs_array) @@ -205,7 +282,7 @@ int ObOptEstUtils::if_expr_start_with_patten_sign(const ParamStore *params, is_start_with = false; all_is_percent_sign = false; bool get_value = false; - bool empty_escape = false; + bool valid_escape = true; char escape; ObObj value; ObObj esp_value; @@ -218,32 +295,48 @@ int ObOptEstUtils::if_expr_start_with_patten_sign(const ParamStore *params, } else if (!get_value || !esp_value.is_string_type()) { // do nothing } else { - if (esp_value.get_char().length() > 0) { - escape = esp_value.get_char()[0]; - } else { - empty_escape = true; + size_t escape_length = ObCharset::strlen_char(esp_expr->get_collation_type(), + esp_value.get_string().ptr(), + esp_value.get_string().length()); + int32_t escape_wc = 0; + if (1 != escape_length) { + valid_escape = false; + } else if (OB_FAIL(ObCharset::mb_wc(esp_expr->get_collation_type(), esp_value.get_string(), escape_wc))) { + ret = OB_SUCCESS; + valid_escape = false; } if (OB_FAIL(get_expr_value(params, *expr, exec_ctx, allocator, get_value, value))) { LOG_WARN("Failed to get expr value", K(ret)); } else if (get_value && value.is_string_type() && value.get_string().length() > 0) { // 1. patten not start with `escape sign` // 2. patten start with `%` or `_` && `%` or `_` is not `escape sign` - char start_c = value.get_string()[0]; - if (empty_escape) { - is_start_with = ('%' == start_c || '_' == start_c); - } else { - is_start_with = (escape != start_c && ('%' == start_c || '_' == start_c)); + ObStringScanner scanner(value.get_string(), expr->get_collation_type()); + ObString encoding; + int32_t wc = 0; + ObString first_c; + bool is_first_char = true; + all_is_percent_sign = true; + while (OB_SUCC(ret) + && scanner.next_character(encoding, wc, ret) + && all_is_percent_sign) { + if (is_first_char) { + bool is_wild = (static_cast('%') == wc || static_cast('_') == wc); + if (!valid_escape) { + is_start_with = is_wild; + } else { + is_start_with = (escape_wc != wc && is_wild); + } + is_first_char = false; + } + if (static_cast('%') != wc) { + all_is_percent_sign = false; + } } - } else { /* do nothing */ } - } - if (OB_SUCC(ret) && is_start_with) { - all_is_percent_sign = true; - const ObString &expr_str = value.get_string(); - for (int64_t i = 0; all_is_percent_sign && i < expr_str.length(); i++) { - if (expr_str[i] != '%') { + if (OB_FAIL(ret)) { + ret = OB_SUCCESS; all_is_percent_sign = false; } - } + } else { /* do nothing */ } } return ret; } diff --git a/src/sql/optimizer/ob_opt_est_utils.h b/src/sql/optimizer/ob_opt_est_utils.h index 7af2973138..e74ee2c6f1 100644 --- a/src/sql/optimizer/ob_opt_est_utils.h +++ b/src/sql/optimizer/ob_opt_est_utils.h @@ -49,7 +49,14 @@ public: //such as, c1 > 1 and c1 < 2, c1 between 1 and 1000, c1 > 100 or c1 < 10000 //@param in qual //@param out is_range - static int is_range_expr(const ObRawExpr *qual, bool &is_simple_filter, const int64_t level = 0); + static int is_range_expr(const ObRawExpr *qual, bool &is_simple_filter); + + static int extract_var_op_const(const ObRawExpr *qual, + const ObRawExpr *&var_expr, + const ObRawExpr *&const_expr1, + const ObRawExpr *&const_expr2, + ObItemType &type, + bool &is_valid); //extract column exprs with simple operator check. //level must be initialized with 0(default value) diff --git a/src/sql/optimizer/ob_opt_selectivity.cpp b/src/sql/optimizer/ob_opt_selectivity.cpp index d50f72b26b..bdf5a9f46a 100644 --- a/src/sql/optimizer/ob_opt_selectivity.cpp +++ b/src/sql/optimizer/ob_opt_selectivity.cpp @@ -40,6 +40,98 @@ namespace sql { inline double revise_ndv(double ndv) { return ndv < 1.0 ? 1.0 : ndv; } +void OptSelectivityCtx::init_op_ctx(ObLogicalOperator *child_op) +{ + if (OB_NOT_NULL(child_op)) { + init_op_ctx(&child_op->get_output_equal_sets(), + child_op->get_card(), + &child_op->get_ambient_card()); + } else { + init_op_ctx(NULL, -1, NULL); + } +} + +int OptSelectivityCtx::get_ambient_card(const uint64_t table_id, double &table_ambient_card) const +{ + int ret = OB_SUCCESS; + table_ambient_card = -1.0; + if (OB_ISNULL(get_stmt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_NOT_NULL(get_ambient_card())) { + uint64_t table_index = get_stmt()->get_table_bit_index(table_id); + if (OB_UNLIKELY(table_index < 1) || + OB_UNLIKELY(table_index >= get_ambient_card()->count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected table index", K(table_index), K(table_id), KPC(get_stmt())); + } else { + table_ambient_card = get_ambient_card()->at(table_index); + } + } + return ret; +} + +ObEstCorrelationModel &ObIndependentModel::get_model() +{ + static ObIndependentModel model; + return model; +} + +ObEstCorrelationModel &ObPartialCorrelationModel::get_model() +{ + static ObPartialCorrelationModel model; + return model; +} + +ObEstCorrelationModel &ObFullCorrelationModel::get_model() +{ + static ObFullCorrelationModel model; + return model; +} + +ObEstCorrelationModel &ObEstCorrelationModel::get_correlation_model(ObEstCorrelationType type) +{ + switch (type) { + case ObEstCorrelationType::INDEPENDENT: return ObIndependentModel::get_model(); + case ObEstCorrelationType::PARTIAL: return ObPartialCorrelationModel::get_model(); + case ObEstCorrelationType::FULL: return ObFullCorrelationModel::get_model(); + default: break; + } + return ObPartialCorrelationModel::get_model(); +} + +double ObIndependentModel::combine_filters_selectivity(ObIArray &selectivities) const +{ + double combine_selectivity = 1.0; + for (int64_t i = 0; i < selectivities.count(); i ++) { + combine_selectivity *= selectivities.at(i); + } + return combine_selectivity; +} + +double ObPartialCorrelationModel::combine_filters_selectivity(ObIArray &selectivities) const +{ + double selectivity = 1.0; + if (!selectivities.empty()) { + double exp = 1.0; + lib::ob_sort(&selectivities.at(0), &selectivities.at(0) + selectivities.count()); + for (int64_t i = 0; i < selectivities.count(); i ++) { + selectivity *= std::pow(selectivities.at(i), 1 / exp); + exp *= 2; + } + } + return selectivity; +} + +double ObFullCorrelationModel::combine_filters_selectivity(ObIArray &selectivities) const +{ + double combine_selectivity = 1.0; + for (int64_t i = 0; i < selectivities.count(); i ++) { + combine_selectivity = std::min(combine_selectivity, selectivities.at(i)); + } + return combine_selectivity; +} + int OptColumnMeta::assign(const OptColumnMeta &other) { int ret = OB_SUCCESS; @@ -87,6 +179,7 @@ int OptTableMeta::assign(const OptTableMeta &other) table_partition_info_ = other.table_partition_info_; base_meta_info_ = other.base_meta_info_; real_rows_ = other.real_rows_; + stale_stats_ = other.stale_stats_; if (OB_FAIL(all_used_parts_.assign(other.all_used_parts_))) { LOG_WARN("failed to assign all used parts", K(ret)); @@ -206,6 +299,12 @@ int OptTableMeta::init_column_meta(const OptSelectivityCtx &ctx, } if (OB_SUCC(ret)) { + if (rows_ < col_meta.get_ndv()) { + col_meta.set_ndv(rows_); + } + if (rows_ < col_meta.get_num_null()) { + col_meta.set_num_null(rows_); + } col_meta.set_column_id(column_id); col_meta.set_avg_len(stat.avglen_val_); col_meta.set_cg_macro_blk_cnt(stat.cg_macro_blk_cnt_); @@ -270,14 +369,6 @@ const OptColumnMeta* OptTableMeta::get_column_meta(const uint64_t column_id) con return column_meta; } -void OptTableMeta::set_ndv_for_all_column(double ndv) -{ - for (int64_t i = 0; i < column_metas_.count(); ++i) { - column_metas_.at(i).set_ndv(ndv); - } - return; -} - int OptTableMetas::copy_table_meta_info(const OptTableMeta &src_meta, OptTableMeta *&dst_meta) { int ret = OB_SUCCESS; @@ -318,7 +409,8 @@ int OptTableMetas::add_base_table_meta_info(OptSelectivityCtx &ctx, int64_t last_analyzed, bool is_stat_locked, const ObTablePartitionInfo *table_partition_info, - const ObTableMetaInfo *base_meta_info) + const ObTableMetaInfo *base_meta_info, + bool stale_stats) { int ret = OB_SUCCESS; ObSqlSchemaGuard *schema_guard = ctx.get_sql_schema_guard(); @@ -337,6 +429,7 @@ int OptTableMetas::add_base_table_meta_info(OptSelectivityCtx &ctx, } else { table_meta->set_version(last_analyzed); table_meta->set_stat_locked(is_stat_locked); + table_meta->set_stale_stats(stale_stats); LOG_TRACE("add base table meta info success", K(*table_meta)); } return ret; @@ -499,12 +592,17 @@ int OptTableMetas::add_generate_table_meta_info(const ObDMLStmt *parent_stmt, ObObj minobj; maxobj.set_max_value(); minobj.set_min_value(); - if (select_expr->is_column_ref_expr() && - OB_FAIL(ObOptSelectivity::get_column_min_max(child_table_metas, child_ctx, *select_expr, minobj, maxobj))) { - LOG_WARN("failed to get column min max", K(ret)); - } else { + avg_len = 0; + if (OB_FAIL(ObOptSelectivity::calculate_expr_avg_len(child_table_metas, child_ctx, select_expr, avg_len))) { + LOG_WARN("failed to get avg len", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calc_expr_min_max(child_table_metas, child_ctx, select_expr, minobj, maxobj))) { + LOG_WARN("failed to calc expr min max", KPC(select_expr)); + } + + if (OB_SUCC(ret)) { column_meta->set_min_value(minobj); column_meta->set_max_value(maxobj); + column_meta->set_avg_len(avg_len); } } } @@ -666,6 +764,90 @@ const OptColumnMeta* OptTableMetas::get_column_meta_by_table_id(const uint64_t t return column_meta; } +const OptDynamicExprMeta* OptTableMetas::get_dynamic_expr_meta(const ObRawExpr *expr) const +{ + const OptDynamicExprMeta* dynamic_expr_meta = NULL; + for (int64_t i = 0; NULL == dynamic_expr_meta && i < dynamic_expr_metas_.count(); ++i) { + if (expr == dynamic_expr_metas_.at(i).get_expr()) { + dynamic_expr_meta = &dynamic_expr_metas_.at(i); + } + } + return dynamic_expr_meta; +} + +double OptTableMetas::get_rows(const uint64_t table_id) const +{ + const OptTableMeta *table_meta = get_table_meta_by_table_id(table_id); + double rows = 1.0; + if (OB_NOT_NULL(table_meta)) { + rows = table_meta->get_rows(); + } + return rows; +} + +int ObOptSelectivity::calculate_conditional_selectivity(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + common::ObIArray &total_filters, + common::ObIArray &append_filters, + double &total_sel, + double &conditional_sel, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + double new_sel = 1.0; + if (OB_FAIL(append(total_filters, append_filters))) { + LOG_WARN("failed to append filters", K(ret)); + } else if (total_sel > OB_DOUBLE_EPSINON && !ctx.get_correlation_model().is_independent()) { + if (OB_FAIL(calculate_selectivity(table_metas, + ctx, + total_filters, + new_sel, + all_predicate_sel))) { + LOG_WARN("failed to calculate selectivity", K(total_filters), K(ret)); + } else { + conditional_sel = new_sel / total_sel; + total_sel = new_sel; + } + } else if (OB_FAIL(calculate_selectivity(table_metas, + ctx, + append_filters, + conditional_sel, + all_predicate_sel))) { + LOG_WARN("failed to calculate selectivity", K(append_filters), K(ret)); + } else { + total_sel *= conditional_sel; + } + return ret; +} + +int ObOptSelectivity::calculate_selectivity(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + ObIArray &sel_estimators, + double &selectivity) +{ + int ret = OB_SUCCESS; + selectivity = 1.0; + ObSEArray selectivities; + ObSEArray dummy; + for (int64_t i = 0; OB_SUCC(ret) && i < sel_estimators.count(); ++i) { + ObSelEstimator *estimator = sel_estimators.at(i); + double tmp_selectivity = 0.0; + if (OB_ISNULL(sel_estimators.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("estimator is null", K(ret), K(sel_estimators)); + } else if (OB_FAIL(estimator->get_sel(table_metas, ctx, tmp_selectivity, dummy))) { + LOG_WARN("failed to get sel", K(ret), KPC(estimator)); + } else if (OB_FAIL(selectivities.push_back(revise_between_0_1(tmp_selectivity)))) { + LOG_WARN("failed to push back", K(ret)); + } + } + if (OB_SUCC(ret)) { + selectivity = ctx.get_correlation_model().combine_filters_selectivity(selectivities); + } + LOG_DEBUG("calculate predicates selectivity", K(selectivity), K(selectivities), K(sel_estimators)); + return ret; +} + int ObOptSelectivity::calculate_selectivity(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObIArray &predicates, @@ -676,8 +858,13 @@ int ObOptSelectivity::calculate_selectivity(const OptTableMetas &table_metas, selectivity = 1.0; ObSEArray sel_estimators; ObSEArray selectivities; - ObArenaAllocator tmp_alloc("ObOptSel"); - ObSelEstimatorFactory factory(tmp_alloc); + ObSelEstimatorFactory factory; + if (OB_ISNULL(ctx.get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + factory.get_allocator().set_tenant_id(ctx.get_session_info()->get_effective_tenant_id()); + } for (int64_t i = 0; OB_SUCC(ret) && i < predicates.count(); ++i) { const ObRawExpr *qual = predicates.at(i); ObSelEstimator *estimator = NULL; @@ -713,7 +900,7 @@ int ObOptSelectivity::calculate_selectivity(const OptTableMetas &table_metas, LOG_WARN("failed to get sel", K(ret), KPC(estimator)); } else { selectivities.at(i) = revise_between_0_1(tmp_selectivity); - if (ObSelEstType::RANGE == estimator->get_type()) { + if (ObSelEstType::COLUMN_RANGE == estimator->get_type()) { ObRangeSelEstimator *range_estimator = static_cast(estimator); if (OB_FAIL(add_var_to_array_no_dup(all_predicate_sel, ObExprSelPair(range_estimator->get_column_expr(), tmp_selectivity, true)))) { @@ -722,7 +909,63 @@ int ObOptSelectivity::calculate_selectivity(const OptTableMetas &table_metas, } } } - selectivity = ObOptSelectivity::get_filters_selectivity(selectivities, ctx.get_dependency_type()); + if (OB_SUCC(ret)) { + selectivity = ctx.get_correlation_model().combine_filters_selectivity(selectivities); + LOG_DEBUG("calculate predicates selectivity", K(selectivity), K(selectivities), K(sel_estimators)); + } + return ret; +} + +int ObOptSelectivity::calculate_join_selectivity(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObIArray &predicates, + double &selectivity, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + selectivity = 1.0; + ObSEArray sel_estimators; + ObSelEstimatorFactory factory; + if (OB_ISNULL(ctx.get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + factory.get_allocator().set_tenant_id(ctx.get_session_info()->get_effective_tenant_id()); + } + for (int64_t i = 0; OB_SUCC(ret) && i < predicates.count(); ++i) { + const ObRawExpr *qual = predicates.at(i); + ObSelEstimator *estimator = NULL; + double single_sel = false; + if (OB_FAIL(factory.create_estimator(ctx, qual, estimator))) { + LOG_WARN("failed to create estimator", KPC(qual)); + } else if (OB_FAIL(ObSelEstimator::append_estimators(sel_estimators, estimator))) { + LOG_WARN("failed to append estimators", KPC(qual)); + } else if (ObOptimizerUtil::find_item(all_predicate_sel, ObExprSelPair(qual, 0))) { + // do nothing + } else if (OB_FAIL(estimator->get_sel(table_metas, ctx, single_sel, all_predicate_sel))) { + LOG_WARN("failed to calculate one qual selectivity", KPC(estimator), K(qual), K(ret)); + } else if (FALSE_IT(single_sel = revise_between_0_1(single_sel))) { + // never reach + } else if (OB_FAIL(add_var_to_array_no_dup(all_predicate_sel, ObExprSelPair(qual, single_sel)))) { + LOG_WARN("fail ed to add selectivity to plan", K(ret), K(qual), K(selectivity)); + } else { + // We remember each predicate's selectivity in the plan so that we can reorder them + // in the vector of filters according to their selectivity. + LOG_PRINT_EXPR(TRACE, "calculate one qual selectivity", *qual, K(single_sel)); + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < sel_estimators.count(); ++i) { + ObSelEstimator *estimator = sel_estimators.at(i); + double tmp_selectivity = 0.0; + if (OB_ISNULL(sel_estimators.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("estimator is null", K(ret), K(sel_estimators)); + } else if (OB_FAIL(estimator->get_sel(table_metas, ctx, tmp_selectivity, all_predicate_sel))) { + LOG_WARN("failed to get sel", K(ret), KPC(estimator)); + } else { + selectivity *= revise_between_0_1(tmp_selectivity); + } + } return ret; } @@ -975,8 +1218,7 @@ int ObOptSelectivity::calculate_qual_selectivity(const OptTableMetas &table_meta ObIArray &all_predicate_sel) { int ret = OB_SUCCESS; - ObArenaAllocator tmp_alloc("ObOptSel"); - ObSelEstimatorFactory factory(tmp_alloc); + ObSelEstimatorFactory factory; ObSelEstimator *estimator = NULL; if (OB_FAIL(factory.create_estimator(ctx, &qual, estimator))) { LOG_WARN("failed to create estimator", K(qual)); @@ -1019,7 +1261,10 @@ int ObOptSelectivity::update_table_meta_info(const OptTableMetas &base_table_met table_meta->clear_base_table_info(); if (filtered_rows >= origin_rows) { // only update table rows - } else if (OB_FAIL(classify_quals(ctx, quals, all_predicate_sel, column_sel_infos))) { + } else if (OB_FAIL(!ctx.check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3) ? + classify_quals_deprecated(ctx, quals, all_predicate_sel, column_sel_infos) : + classify_quals(base_table_metas, ctx, quals, all_predicate_sel, column_sel_infos))) { LOG_WARN("failed to classify quals", K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < table_meta->get_column_metas().count(); ++i) { @@ -1039,32 +1284,19 @@ int ObOptSelectivity::update_table_meta_info(const OptTableMetas &base_table_met * 使用第一步得到的ndv和rows作为column的原始ndv和rows,再基于所有过滤谓词过滤后的行数, * 使用缩放公式缩放列的ndv。 */ + double origin_ndv = column_meta.get_ndv(); double step1_ndv = column_meta.get_ndv(); double step2_ndv = column_meta.get_ndv(); double step1_row = origin_rows; double null_num = column_meta.get_num_null(); double hist_scale = -1; - // step 1 - if (OB_NOT_NULL(sel_info)) { - step1_row *= sel_info->selectivity_; - hist_scale = sel_info->selectivity_; - if (sel_info->equal_count_ > 0) { - step1_ndv = sel_info->equal_count_; - } else if (sel_info->has_range_exprs_) { - step1_ndv *= sel_info->range_selectivity_; - } else { - step1_ndv = scale_distinct(step1_row, origin_rows, column_meta.get_ndv()); - } - } - // step 2 - if (filtered_rows < step1_row) { - step2_ndv = scale_distinct(filtered_rows, step1_row, step1_ndv); - } else { - step2_ndv = step1_ndv; + if (null_num > origin_rows - origin_ndv) { + null_num = std::max(origin_rows - origin_ndv, 0.0); } + double nns = origin_rows <= OB_DOUBLE_EPSINON ? 1.0 : 1 - revise_between_0_1(null_num / origin_rows); + bool null_reject = false; // update null number if (null_num > 0) { - bool null_reject = false; const ObColumnRefRawExpr *column_expr = log_plan->get_column_expr_by_id( table_meta->get_table_id(), column_meta.get_column_id()); if (OB_ISNULL(column_expr)) { @@ -1080,6 +1312,52 @@ int ObOptSelectivity::update_table_meta_info(const OptTableMetas &base_table_met null_num = null_num * filtered_rows / origin_rows; } } + // step 1 + if (OB_NOT_NULL(sel_info)) { + if (!ctx.check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + step1_row *= sel_info->selectivity_; + hist_scale = sel_info->selectivity_; + if (sel_info->equal_count_ > 0) { + step1_ndv = sel_info->equal_count_; + } else if (sel_info->has_range_exprs_) { + step1_ndv *= sel_info->range_selectivity_; + } else { + step1_ndv = scale_distinct(step1_row, origin_rows, column_meta.get_ndv()); + } + } else { + double step1_sel = sel_info->selectivity_; + double direct_ndv_sel = 1.0; + if (origin_rows * step1_sel < filtered_rows && + origin_rows > OB_DOUBLE_EPSINON) { + step1_sel = filtered_rows / origin_rows; + } + if (null_reject) { + if (step1_sel <= nns && nns > OB_DOUBLE_EPSINON) { + direct_ndv_sel = step1_sel / nns; + } else { + direct_ndv_sel = 1.0; + } + } else { + // complex quals, the selectivity might be default + // do not handle null + direct_ndv_sel = step1_sel; + } + step1_row *= step1_sel; + hist_scale = step1_sel; + if (sel_info->equal_count_ > 0) { + step1_ndv = sel_info->equal_count_; + } else { + step1_ndv *= direct_ndv_sel; + } + } + } + // step 2 + if (filtered_rows < step1_row) { + step2_ndv = scale_distinct(filtered_rows, step1_row, step1_ndv); + } else { + step2_ndv = step1_ndv; + } // set new column meta if (OB_SUCC(ret)) { column_meta.set_ndv(revise_ndv(step2_ndv)); @@ -1885,12 +2163,12 @@ int ObOptSelectivity::get_column_hist_scale(const OptTableMetas &table_metas, } int ObOptSelectivity::get_column_basic_info(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &expr, - double *ndv_ptr, - double *num_null_ptr, - double *avg_len_ptr, - double *row_count_ptr) + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + double *ndv_ptr, + double *num_null_ptr, + double *avg_len_ptr, + double *row_count_ptr) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!expr.is_column_ref_expr())) { @@ -1903,6 +2181,8 @@ int ObOptSelectivity::get_column_basic_info(const OptTableMetas &table_metas, double num_null = 0; double avg_len = 0; double row_count = 0; + double cur_rowcnt = ctx.get_current_rows(); + if (OB_FAIL(get_column_basic_from_meta(table_metas, column_expr, need_default, @@ -1917,17 +2197,36 @@ int ObOptSelectivity::get_column_basic_info(const OptTableMetas &table_metas, if (num_null > row_count - ndv) { num_null = row_count - ndv > 0 ? row_count - ndv : 0; } - if (ctx.get_current_rows() > 0.0 && ctx.get_current_rows() < row_count) { - ndv = scale_distinct(ctx.get_current_rows(), row_count, ndv); + if (NULL != ctx.get_ambient_card() && + !ctx.get_ambient_card()->empty() && + ctx.check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + ObSEArray table_idx; + if (OB_FAIL(expr.get_relation_ids().to_array(table_idx))) { + LOG_WARN("failed to get table idx", K(ret), K(expr)); + } else if (OB_UNLIKELY(table_idx.count() != 1) || + OB_UNLIKELY(table_idx.at(0) < 1) || + OB_UNLIKELY(table_idx.at(0) >= ctx.get_ambient_card()->count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr relation ids", K(expr), K(table_idx), K(ctx)); + } else { + cur_rowcnt = ctx.get_ambient_card()->at(table_idx.at(0)); + } } - LOG_TRACE("show column basic info", K(row_count), K(ctx.get_current_rows()), K(num_null), K(avg_len), K(ndv)); + if (OB_SUCC(ret) && cur_rowcnt > 0.0 && cur_rowcnt < row_count) { + ndv = scale_distinct(cur_rowcnt, row_count, ndv); + } + + LOG_TRACE("show column basic info", K(row_count), K(cur_rowcnt), K(num_null), K(avg_len), K(ndv)); // set return - assign_value(row_count, row_count_ptr); - assign_value(ndv, ndv_ptr); - assign_value(num_null, num_null_ptr); - assign_value(avg_len, avg_len_ptr); + if (OB_SUCC(ret)) { + assign_value(row_count, row_count_ptr); + assign_value(ndv, ndv_ptr); + assign_value(num_null, num_null_ptr); + assign_value(avg_len, avg_len_ptr); + } } } return ret; @@ -2038,26 +2337,8 @@ int ObOptSelectivity::get_compare_value(const OptSelectivityCtx &ctx, can_cmp = false; } else if (expr_value.get_type() != col->get_result_type().get_type() || expr_value.get_collation_type() != col->get_result_type().get_collation_type()) { - const ObDataTypeCastParams dtc_params = - ObBasicSessionInfo::create_dtc_params(ctx.get_session_info()); - ObObj dest_value; - ObCastCtx cast_ctx(&ctx.get_allocator(), - &dtc_params, - CM_NONE, - col->get_result_type().get_collation_type()); - ObAccuracy res_acc; - if (col->get_result_type().is_decimal_int()) { - res_acc = col->get_result_type().get_accuracy(); - cast_ctx.res_accuracy_ = &res_acc; - } - if (OB_FAIL(ObObjCaster::to_type(col->get_result_type().get_type(), - col->get_result_type().get_collation_type(), - cast_ctx, - expr_value, - dest_value))) { - LOG_WARN("failed to cast value", K(ret)); - } else { - expr_value = dest_value; + if (OB_FAIL(convert_obj_to_expr_type(ctx, col, CM_NONE, expr_value))) { + LOG_WARN("failed to convert obj", K(ret), K(expr_value)); } } return ret; @@ -2431,6 +2712,142 @@ int ObOptSelectivity::get_simple_mutex_column(const ObRawExpr *qual, const ObRaw return ret; } +int ObOptSelectivity::calculate_table_ambient_cardinality(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRelIds &rel_ids, + const double cur_rows, + double &table_ambient_card) +{ + int ret = OB_SUCCESS; + if (NULL != ctx.get_ambient_card() && + !ctx.get_ambient_card()->empty()) { + table_ambient_card = 1.0; + for (int64_t i = 0; i < ctx.get_ambient_card()->count(); i ++) { + if (rel_ids.has_member(i)) { + table_ambient_card *= std::max(1.0, ctx.get_ambient_card()->at(i)); + } + } + } else { + ObSEArray table_ids; + const ObDMLStmt *stmt = ctx.get_stmt(); + table_ambient_card = 1.0; + if (OB_ISNULL(stmt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null stmt", K(ret)); + } else if (OB_FAIL(stmt->relids_to_table_ids(rel_ids, table_ids))) { + LOG_WARN("faile to get table ids", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < table_ids.count(); i ++) { + table_ambient_card *= std::max(1.0, table_metas.get_rows(table_ids.at(i))); + } + } + if (cur_rows > 0) { + table_ambient_card = std::min(cur_rows, table_ambient_card); + } + return ret; +} + +int ObOptSelectivity::calculate_distinct_in_single_table(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRelIds &rel_id, + const common::ObIArray& exprs, + const double cur_rows, + double &rows) +{ + int ret = OB_SUCCESS; + rows = 1.0; + ObSEArray special_exprs; + ObSEArray expr_ndv; + ObSEArray filtered_exprs; + double ambient_card = -1.0; + //classify expr and get ndv + if (OB_UNLIKELY(rel_id.num_members() != 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected relation id", K(ret)); + } else if (OB_FAIL(calculate_table_ambient_cardinality(table_metas, ctx, rel_id, cur_rows, ambient_card))) { + LOG_WARN("failed to calculate ambient card", K(ret)); + } else if (OB_FAIL(filter_column_by_equal_set(table_metas, ctx, exprs, filtered_exprs))) { + LOG_WARN("failed filter column by equal set", K(ret)); + } else if (OB_FAIL(calculate_expr_ndv(filtered_exprs, expr_ndv, table_metas, ctx, ambient_card))) { + LOG_WARN("fail to calculate expr ndv", K(ret)); + } else if (!ctx.check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + for (int64_t i = 0; OB_SUCC(ret) && i < expr_ndv.count(); ++i) { + if (0 == i) { + rows *= expr_ndv.at(i); + } else { + rows *= expr_ndv.at(i) / std::sqrt(2); + } + } + } else { + rows = combine_ndvs(ambient_card, expr_ndv); + } + LOG_TRACE("succeed to calculate distinct in single table", K(rel_id), K(ambient_card), K(rows), K(expr_ndv), K(exprs)); + + return ret; +} + +int ObOptSelectivity::remove_dummy_distinct_exprs(ObIArray &helpers, + ObIArray &exprs) +{ + int ret = OB_SUCCESS; + ObSEArray new_exprs; + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); i ++) { + ObRawExpr *expr = exprs.at(i); + bool is_dummy = false; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(exprs)); + } else if (expr->has_flag(CNT_WINDOW_FUNC) || + expr->is_column_ref_expr()) { + // do nothing + } else if (OB_FAIL(check_expr_in_distinct_helper(expr, helpers, is_dummy))) { + LOG_WARN("failed to check expr", K(ret)); + } + if (OB_SUCC(ret) && !is_dummy) { + if (OB_FAIL(new_exprs.push_back(expr))) { + LOG_WARN("failed to push back"); + } + } + } + if (OB_SUCC(ret) && new_exprs.count() != exprs.count()) { + LOG_DEBUG("remove dummy distinct exprs", K(exprs), K(new_exprs)); + if (OB_FAIL(exprs.assign(new_exprs))) { + LOG_WARN("failed to assign exprs", K(ret)); + } + } + return ret; +} + +int ObOptSelectivity::check_expr_in_distinct_helper(const ObRawExpr *expr, + const ObIArray &helpers, + bool &is_dummy_expr) +{ + int ret = OB_SUCCESS; + ObSEArray column_exprs; + bool found = false; + is_dummy_expr = true; + if (OB_FAIL(ObRawExprUtils::extract_column_exprs(expr, column_exprs))) { + LOG_WARN("failed to extract column exprs", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && is_dummy_expr && i < column_exprs.count(); i ++) { + ObRawExpr *col_expr = column_exprs.at(i); + if (OB_ISNULL(col_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(column_exprs)); + } + for (int64_t j = 0; OB_SUCC(ret) && !found && j < helpers.count(); j ++) { + if (col_expr->get_relation_ids() == helpers.at(j).rel_id_) { + found = ObOptimizerUtil::find_item(helpers.at(j).exprs_, col_expr); + } + } + if (!found) { + is_dummy_expr = false; + } + } + return ret; +} + int ObOptSelectivity::calculate_distinct(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObIArray& exprs, @@ -2440,93 +2857,180 @@ int ObOptSelectivity::calculate_distinct(const OptTableMetas &table_metas, { int ret = OB_SUCCESS; rows = 1; - ObSEArray column_exprs; - ObSEArray special_exprs; - ObSEArray expr_ndv; - ObSEArray filtered_exprs; - //classify expr and get ndv - if (OB_FAIL(classify_exprs(exprs, column_exprs, special_exprs, table_metas, ctx))) { + ObSEArray single_ndvs; + ObSEArray helpers; + ObSEArray special_exprs; + /** + * 1. 将 exprs 根据基表分组,sepcial exprs 中保存不在基表计算的表达式,例如 window function 等 + * 2. 基表内计算 NDV 后(每张表的 NDV 最大值受基表行数限制),再根据当前行数计算联合 NDV + */ + if (OB_FAIL(classify_exprs(ctx, exprs, helpers, special_exprs))) { LOG_WARN("failed to classify_exprs", K(ret)); - } else if (OB_FAIL(filter_column_by_equal_set(table_metas, ctx, column_exprs, filtered_exprs))) { - LOG_WARN("failed filter column by equal set", K(ret)); - } else if (OB_FAIL(calculate_expr_ndv(filtered_exprs, expr_ndv, table_metas, ctx, origin_rows))) { + } else if (OB_FAIL(remove_dummy_distinct_exprs(helpers, special_exprs))) { + LOG_WARN("failed to remove dummy exprs", K(ret)); + } else if (OB_FAIL(calculate_expr_ndv(special_exprs, single_ndvs, table_metas, ctx, origin_rows))) { LOG_WARN("fail to calculate expr ndv", K(ret)); - } else if (OB_FAIL(calculate_expr_ndv(special_exprs, expr_ndv, table_metas, ctx, origin_rows))) { - LOG_WARN("fail to calculate special expr ndv", K(ret)); } - //calculate rows - for (int64_t i = 0; OB_SUCC(ret) && i < expr_ndv.count(); ++i) { - if (0 == i) { - rows *= expr_ndv.at(i); - } else { - rows *= expr_ndv.at(i) / std::sqrt(2); + for (int64_t i = 0; OB_SUCC(ret) && i < helpers.count(); i ++) { + OptDistinctHelper &helper = helpers.at(i); + double single_table_ndv = 1.0; + if (OB_FAIL(remove_dummy_distinct_exprs(helpers, helper.exprs_))) { + LOG_WARN("failed to remove dummy exprs", K(ret)); + } else if (OB_FAIL(calculate_distinct_in_single_table( + table_metas, ctx, helper.rel_id_, helper.exprs_, need_refine ? origin_rows : -1, single_table_ndv))) { + LOG_WARN("failed to calculate distinct in single table", K(helper.exprs_)); + } else if (OB_FAIL(single_ndvs.push_back(single_table_ndv))) { + LOG_WARN("failed to push back", K(ret)); } } - //refine - if (OB_SUCC(ret) && need_refine) { - rows = std::min(rows, origin_rows); - LOG_TRACE("succeed to calculate distinct", K(origin_rows), K(rows), K(exprs)); + if (!ctx.check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + for (int64_t i = 0; OB_SUCC(ret) && i < single_ndvs.count(); ++i) { + if (0 == i) { + rows *= single_ndvs.at(i); + } else { + rows *= single_ndvs.at(i) / std::sqrt(2); + } + } + //refine + if (OB_SUCC(ret) && need_refine && origin_rows >= 0.0) { + rows = std::min(rows, origin_rows); + } + } else { + rows = combine_ndvs(need_refine ? origin_rows : -1, single_ndvs); + } + LOG_TRACE("succeed to calculate distinct", K(ctx), K(origin_rows), K(rows), K(single_ndvs), K(exprs)); + return ret; +} + +int ObOptSelectivity::calculate_distinct(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr& expr, + const double origin_rows, + double &rows, + const bool need_refine) +{ + int ret = OB_SUCCESS; + ObSEArray expr_array; + if (OB_FAIL(expr_array.push_back(const_cast(&expr)))) { + LOG_WARN("failed to push back expr", K(ret)); + } else if (OB_FAIL(calculate_distinct(table_metas, ctx, expr_array, origin_rows, rows, need_refine))) { + LOG_WARN("failed to calculate distinct", K(expr)); } return ret; } -int ObOptSelectivity::classify_exprs(const ObIArray& exprs, - ObIArray& column_exprs, - ObIArray& special_exprs, - const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx) +double ObOptSelectivity::combine_two_ndvs(double ambient_card, double ndv1, double ndv2) +{ + ndv1 = std::max(1.0, ndv1); + ndv2 = std::max(1.0, ndv2); + double max_ndv = std::max(ndv1, ndv2); + double min_ndv = std::min(ndv1, ndv2); + double combine_ndv = 1.0; + if (ambient_card >= 0.0) { + // due to the precision, we need to refine the result + combine_ndv = ndv1 * ndv2 * (1 - pow(1 - 1 / min_ndv , ambient_card / max_ndv)); + combine_ndv = std::max(combine_ndv, max_ndv); + combine_ndv = std::min(ambient_card, combine_ndv); + } else { + combine_ndv = ndv1 * ndv2 / std::sqrt(2); + combine_ndv = std::max(combine_ndv, max_ndv); + } + return combine_ndv; +} + +double ObOptSelectivity::combine_ndvs(double ambient_card, ObIArray &ndvs) +{ + double ndv = 1.0; + if (!ndvs.empty()) { + lib::ob_sort(&ndvs.at(0), &ndvs.at(0) + ndvs.count()); + ndv = ndvs.at(0); + for (int64_t i = 1; i < ndvs.count(); i ++) { + ndv = combine_two_ndvs(ambient_card, ndv, ndvs.at(i)); + } + } + if (ambient_card >= 0.0) { + ndv = std::min(ambient_card, ndv); + } + return ndv; +} + +int ObOptSelectivity::classify_exprs(const OptSelectivityCtx &ctx, + const ObIArray& exprs, + ObIArray &helpers, + ObIArray& special_exprs) { int ret = OB_SUCCESS; for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); ++i) { - ObRawExpr *child_expr = NULL; - if (OB_ISNULL(child_expr = exprs.at(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret), K(i)); - } else if (OB_FAIL(classify_exprs(child_expr, column_exprs, special_exprs, table_metas, ctx))) { + if (OB_FAIL(classify_exprs(ctx, exprs.at(i), helpers, special_exprs))) { LOG_WARN("failed to classify_exprs", K(ret)); } } return ret; } -int ObOptSelectivity::classify_exprs(ObRawExpr* expr, - ObIArray& column_exprs, - ObIArray& special_exprs, - const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx) +int ObOptSelectivity::classify_exprs(const OptSelectivityCtx &ctx, + ObRawExpr *expr, + ObIArray &helpers, + ObIArray& special_exprs) { int ret = OB_SUCCESS; + bool is_special = false; if (OB_ISNULL(expr)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null pointer", K(expr), K(ret)); - } else if (is_special_expr(*expr)) { - if (OB_FAIL(add_var_to_array_no_dup(special_exprs, expr))) { - LOG_WARN("fail to add expr to array", K(ret)); + } else if (OB_FAIL(check_is_special_distinct_expr(ctx, expr, is_special))) { + LOG_WARN("failed to check expr", K(ret)); + } else if (is_special) { + if (expr->has_flag(CNT_WINDOW_FUNC) || + expr->get_relation_ids().num_members() != 1) { + if (OB_FAIL(add_var_to_array_no_dup(special_exprs, expr))) { + LOG_WARN("failed to push back", K(ret)); + } + } else { + if (OB_FAIL(add_expr_to_distinct_helper(helpers, expr->get_relation_ids(), expr))) { + LOG_WARN("failed to add expr to helper", K(ret)); + } } } else if (expr->is_column_ref_expr()) { - if (OB_FAIL(add_var_to_array_no_dup(column_exprs, expr))) { - LOG_WARN("fail to add expr to array", K(ret)); + if (OB_FAIL(add_expr_to_distinct_helper(helpers, expr->get_relation_ids(), expr))) { + LOG_WARN("failed to add expr to helper", K(ret)); } } else { for (int64_t i = 0; OB_SUCC(ret) && i < expr->get_param_count(); ++i) { - ObRawExpr *child_expr = NULL; - if (OB_ISNULL(child_expr = expr->get_param_expr(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret), K(i)); - } else if (OB_FAIL(classify_exprs(child_expr, column_exprs, special_exprs, table_metas, ctx))) { + if (OB_FAIL(SMART_CALL(classify_exprs(ctx, expr->get_param_expr(i), helpers, special_exprs)))) { LOG_WARN("failed to classify_exprs", K(ret)); } } } + LOG_DEBUG("succeed to classify distinct exprs", K(helpers), K(special_exprs)); return ret; } -bool ObOptSelectivity::is_special_expr(const ObRawExpr &expr) { - bool is_special = false; - if (expr.is_win_func_expr()) { - is_special = true; +int ObOptSelectivity::add_expr_to_distinct_helper(ObIArray &helpers, + const ObRelIds &rel_id, + ObRawExpr *expr) +{ + int ret = OB_SUCCESS; + bool found = false; + OptDistinctHelper *helper = NULL; + for (int64_t idx = 0; NULL == helper && idx < helpers.count(); idx ++) { + if (helpers.at(idx).rel_id_.equal(rel_id)) { + helper = &helpers.at(idx); + } } - return is_special; + if (NULL == helper) { + if (OB_ISNULL(helper = helpers.alloc_place_holder())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate", K(ret)); + } else if (OB_FAIL(helper->rel_id_.add_members(rel_id))) { + LOG_WARN("failed to add member", K(ret)); + } + } + if (OB_SUCC(ret) && OB_FAIL(add_var_to_array_no_dup(helper->exprs_, expr))) { + LOG_WARN("failed to push back", K(ret)); + } + return ret; } int ObOptSelectivity::calculate_expr_ndv(const ObIArray& exprs, @@ -2559,6 +3063,56 @@ int ObOptSelectivity::calculate_expr_ndv(const ObIArray& exprs, return ret; } +int ObOptSelectivity::check_is_special_distinct_expr(const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + bool &is_special) +{ + int ret = OB_SUCCESS; + is_special = false; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(expr), K(ret)); + } else if (expr->is_win_func_expr()) { + is_special = true; + } else if (!ctx.check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + is_special = false; + } else if (expr->is_const_expr()) { + is_special = false; + } else if (T_OP_MOD == expr->get_expr_type()) { + is_special = expr->get_param_count() == 2 && + OB_NOT_NULL(expr->get_param_expr(1)) && + expr->get_param_expr(1)->is_static_scalar_const_expr(); + } else if (T_FUN_SYS_SUBSTR == expr->get_expr_type() || T_FUN_SYS_SUBSTRB == expr->get_expr_type()) { + if (expr->get_param_count() == 2) { + is_special = OB_NOT_NULL(expr->get_param_expr(1)) && expr->get_param_expr(1)->is_static_scalar_const_expr(); + } else if (expr->get_param_count() == 3) { + is_special = OB_NOT_NULL(expr->get_param_expr(2)) && expr->get_param_expr(2)->is_static_scalar_const_expr(); + } + } else if (is_dense_time_expr_type(expr->get_expr_type()) || + T_FUN_SYS_MONTH_NAME == expr->get_expr_type() || + T_FUN_SYS_DAY_NAME == expr->get_expr_type()) { + is_special = expr->get_param_count() == 1; + } else if (T_FUN_SYS_EXTRACT == expr->get_expr_type()) { + is_special = expr->get_param_count() == 2 && + OB_NOT_NULL(expr->get_param_expr(0)) && + expr->get_param_expr(0)->is_static_scalar_const_expr(); + } else if (T_FUN_SYS_CAST == expr->get_expr_type()) { + const ObRawExpr *param_expr = NULL; + bool is_monotonic = false; + if (OB_UNLIKELY(expr->get_param_count() < 2) || + OB_ISNULL(param_expr = expr->get_param_expr(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr", KPC(expr)); + } else if (expr->get_data_type() != ObDateType) { + is_special = false; + } else if (OB_FAIL(ObObjCaster::is_cast_monotonic(param_expr->get_data_type(), expr->get_data_type(), is_special))) { + LOG_WARN("check cast monotonic error", KPC(expr), K(ret)); + } + } + return ret; +} + int ObOptSelectivity::calculate_special_ndv(const OptTableMetas &table_metas, const ObRawExpr* expr, const OptSelectivityCtx &ctx, @@ -2566,10 +3120,129 @@ int ObOptSelectivity::calculate_special_ndv(const OptTableMetas &table_metas, const double origin_rows) { int ret = OB_SUCCESS; - if (OB_ISNULL(expr)) { + const ObRawExpr *param_expr = NULL; + special_ndv = std::max(origin_rows, 1.0); + bool is_special = false; + bool need_refine_by_param_expr = true; + if (OB_FAIL(check_is_special_distinct_expr(ctx, expr, is_special))) { + LOG_WARN("failed to check expr", K(ret)); + } else if (OB_UNLIKELY(!is_special)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr", KPC(expr), K(ret)); + } else if (expr->is_win_func_expr()) { + if (OB_FAIL(calculate_winfunc_ndv(table_metas, expr, ctx, special_ndv, origin_rows))) { + LOG_WARN("failed to calculate windown function ndv", K(ret)); + } + need_refine_by_param_expr = false; + } else if (T_OP_MOD == expr->get_expr_type()) { + param_expr = expr->get_param_expr(0); + const ObRawExpr *const_expr = expr->get_param_expr(1); + bool valid = false; + if (OB_FAIL(calc_const_numeric_value(ctx, const_expr, special_ndv, valid))) { + LOG_WARN("failed to calc const value", K(ret)); + } else { + special_ndv = std::abs(special_ndv); + } + } else if (T_FUN_SYS_SUBSTR == expr->get_expr_type() || T_FUN_SYS_SUBSTRB == expr->get_expr_type()) { + double substr_len = 0.0; + double dummy = 0.0; + need_refine_by_param_expr = false; // substr ndv will not be greater than its param + if (OB_FAIL(calculate_expr_avg_len(table_metas, ctx, expr, substr_len))) { + LOG_WARN("failed to calc expr length", K(ret)); + } else if (OB_FAIL(calculate_substrb_info(table_metas, + ctx, + expr->get_param_expr(0), + substr_len - ObOptEstCostModel::DEFAULT_FIXED_OBJ_WIDTH, + origin_rows, + special_ndv, + dummy))) { + LOG_WARN("failed to calculate substr ndv", K(ret)); + } + } else if (is_dense_time_expr_type(expr->get_expr_type()) || + (T_FUN_SYS_CAST == expr->get_expr_type() && expr->get_data_type() == ObDateType) || + T_FUN_SYS_EXTRACT == expr->get_expr_type()) { + if (T_FUN_SYS_EXTRACT == expr->get_expr_type()) { + param_expr = expr->get_param_expr(1); + } else { + param_expr = expr->get_param_expr(0); + } + ObObj min_value; + ObObj max_value; + bool use_default = false; + double min_scalar = 0.0; + double max_scalar = 0.0; + if (OB_FAIL(calc_expr_min_max(table_metas, + ctx, + expr, + min_value, + max_value))) { + LOG_WARN("failed to calculate expr min max", K(ret)); + } else if (min_value.is_min_value() || max_value.is_max_value() || + !(min_value.is_integer_type() || min_value.is_number() || min_value.is_date()) || + !(max_value.is_integer_type() || max_value.is_number() || max_value.is_date())) { + use_default = true; + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&min_value, min_scalar)) || + OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&max_value, max_scalar))) { + LOG_WARN("failed to convert obj to double", K(ret), K(min_value), K(max_value)); + } else { + special_ndv = max_scalar - min_scalar + 1; + } + if (OB_SUCC(ret) && !use_default) { + if (T_FUN_SYS_YEARWEEK_OF_DATE == expr->get_expr_type()) { + special_ndv *= 54.0 / 100.0; + } else if (T_FUN_SYS_EXTRACT == expr->get_expr_type()) { + ObObj result; + bool got_result = false; + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + expr->get_param_expr(0), + result, + got_result, + ctx.get_allocator()))) { + LOG_WARN("fail to calc_const_or_calculable_expr", K(ret)); + } else if (!got_result || result.is_null() || !result.is_int()) { + // do nothing + } else if (DATE_UNIT_YEAR_MONTH == result.get_int()) { + special_ndv *= 12.0 / 100.0; + } + } + } + } else if (T_FUN_SYS_MONTH_NAME == expr->get_expr_type()) { + special_ndv = 12; + param_expr = expr->get_param_expr(0); + } else if (T_FUN_SYS_DAY_NAME == expr->get_expr_type()) { + special_ndv = 7; + param_expr = expr->get_param_expr(0); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected special expr", KPC(expr)); + } + if (OB_SUCC(ret) && need_refine_by_param_expr && NULL != param_expr) { + double ndv_upper_bound = 1.0; + if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, + ctx, + *param_expr, + origin_rows, + ndv_upper_bound)))) { + LOG_WARN("failed to calculate distinct", K(ret), KPC(param_expr)); + } else { + special_ndv = std::min(special_ndv, ndv_upper_bound); + } + } + special_ndv = revise_ndv(special_ndv); + return ret; +} + +int ObOptSelectivity::calculate_winfunc_ndv(const OptTableMetas &table_metas, + const ObRawExpr* expr, + const OptSelectivityCtx &ctx, + double &special_ndv, + const double origin_rows) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(expr) || OB_UNLIKELY(!expr->is_win_func_expr())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null pointer", K(expr), K(ret)); - } else if (expr->is_win_func_expr()) { + } else { double part_order_ndv = 1.0; double order_ndv = 1.0; double part_ndv = 1.0; @@ -2578,6 +3251,10 @@ int ObOptSelectivity::calculate_special_ndv(const OptTableMetas &table_metas, ObSEArray part_order_exprs; const ObWinFunRawExpr *win_expr = reinterpret_cast(expr); const ObIArray &order_items = win_expr->get_order_items(); + if (OB_UNLIKELY(origin_rows < 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("win function ndv depends on current rows", K(ret), K(origin_rows)); + } for (int64_t i = 0; OB_SUCC(ret) && i < order_items.count(); ++i) { const OrderItem &order_item = order_items.at(i); ObRawExpr *order_expr = order_item.expr_; @@ -2597,11 +3274,11 @@ int ObOptSelectivity::calculate_special_ndv(const OptTableMetas &table_metas, LOG_WARN("fail to assign exprs", K(ret)); } else if (OB_FAIL(append(part_order_exprs, order_exprs))) { LOG_WARN("failed to append exprs", K(ret)); - } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, part_order_exprs, origin_rows, part_order_ndv, false)))) { - LOG_WARN("failed to calculate_distinct", K(ret)); - } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, order_exprs, origin_rows, order_ndv, false)))) { + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, part_order_exprs, origin_rows, part_order_ndv)))) { LOG_WARN("failed to calculate_distinct", K(ret)); - } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, part_exprs, origin_rows, part_ndv, false)))) { + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, order_exprs, origin_rows, order_ndv)))) { + LOG_WARN("failed to calculate_distinct", K(ret)); + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, part_exprs, origin_rows, part_ndv)))) { LOG_WARN("failed to calculate_distinct", K(ret)); } @@ -2620,13 +3297,12 @@ int ObOptSelectivity::calculate_special_ndv(const OptTableMetas &table_metas, ObRawExpr* const_expr = NULL; ObObj result,out_ptr; bool got_result = false; - const ParamStore *params = ctx.get_params(); if (OB_FAIL(param_exprs.assign(win_expr->get_func_params()))) { LOG_WARN("fail to assign exprs", K(ret)); } else if (param_exprs.count() == 0|| OB_ISNULL(const_expr = param_exprs.at(0))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error", K(param_exprs.count()), K(const_expr), K(ret)); - } else if (ObOptEstUtils::is_calculable_expr(*const_expr, params->count())) { + } else if (const_expr->is_static_const_expr()) { if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), const_expr, result, @@ -2653,7 +3329,7 @@ int ObOptSelectivity::calculate_special_ndv(const OptTableMetas &table_metas, LOG_WARN("unexpected null pointer", K(aggr_expr), K(ret)); } else if (OB_FAIL(param_exprs.assign(aggr_expr->get_real_param_exprs()))) { LOG_WARN("fail to assign exprs", K(ret)); - } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, param_exprs, origin_rows, param_ndv, false)))) { + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, param_exprs, origin_rows, param_ndv)))) { LOG_WARN("failed to calculate_distinct", K(ret)); } else { special_ndv = std::min(part_order_ndv, param_ndv); @@ -2665,7 +3341,7 @@ int ObOptSelectivity::calculate_special_ndv(const OptTableMetas &table_metas, double param_ndv = 1.0; if (OB_FAIL(param_exprs.assign(win_expr->get_func_params()))) { LOG_WARN("fail to assign exprs", K(ret)); - } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, param_exprs, origin_rows, param_ndv, false)))) { + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, param_exprs, origin_rows, param_ndv)))) { LOG_WARN("failed to calculate_distinct", K(ret)); } else { special_ndv = std::min(part_order_ndv, param_ndv); @@ -2676,7 +3352,7 @@ int ObOptSelectivity::calculate_special_ndv(const OptTableMetas &table_metas, double param_ndv = 1.0; if (OB_FAIL(param_exprs.assign(win_expr->get_func_params()))) { LOG_WARN("fail to assign exprs", K(ret)); - } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, param_exprs, origin_rows, param_ndv, false)))) { + } else if (OB_FAIL(SMART_CALL(calculate_distinct(table_metas, ctx, param_exprs, origin_rows, param_ndv)))) { LOG_WARN("failed to calculate_distinct", K(ret)); } else { special_ndv = param_ndv; @@ -2684,7 +3360,7 @@ int ObOptSelectivity::calculate_special_ndv(const OptTableMetas &table_metas, } else { special_ndv = part_order_ndv; } - LOG_TRACE("calculate win expr ndv", K(win_expr->get_func_type()), K(part_exprs.count()), K(order_exprs.count())); + LOG_TRACE("calculate window function ndv", KPC(win_expr), K(special_ndv)); } special_ndv = revise_ndv(special_ndv); return ret; @@ -2717,7 +3393,7 @@ int ObOptSelectivity::filter_column_by_equal_set(const OptTableMetas &table_meta LOG_WARN("failed to find the expr with min ndv", K(ret)); } else if (!find && FALSE_IT(filtered_expr = column_exprs.at(i))) { // never reach - } else if (OB_FAIL(filtered_exprs.push_back(filtered_expr))) { + } else if (OB_FAIL(add_var_to_array_no_dup(filtered_exprs, filtered_expr))) { LOG_WARN("failed to push back expr", K(ret)); } } @@ -2768,9 +3444,11 @@ int ObOptSelectivity::get_min_ndv_by_equal_set(const OptTableMetas &table_metas, ObBitSet<> col_added; find = false; const EqualSets *eq_sets = ctx.get_equal_sets(); - if (OB_ISNULL(eq_sets)) { + if (OB_ISNULL(eq_sets) || OB_ISNULL(col_expr)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null", K(ret)); + } else if (!col_expr->is_column_ref_expr()) { + // do nothing } else { for (int64_t i = 0; OB_SUCC(ret) && !find && i < eq_sets->count(); i++) { const ObRawExprSet *equal_set = eq_sets->at(i); @@ -2825,17 +3503,20 @@ int ObOptSelectivity::get_min_ndv_by_equal_set(const OptTableMetas &table_metas, } int ObOptSelectivity::is_columns_contain_pkey(const OptTableMetas &table_metas, - const ObIArray &col_exprs, + const ObIArray &exprs, bool &is_pkey, - bool &is_union_pkey) + bool &is_union_pkey, + uint64_t *table_id_ptr) { int ret = OB_SUCCESS; ObSEArray col_ids; - uint64_t table_id; - if (OB_FAIL(extract_column_ids(col_exprs, col_ids, table_id))) { + uint64_t table_id = OB_INVALID_INDEX; + if (OB_FAIL(extract_column_ids(exprs, col_ids, table_id))) { LOG_WARN("failed to extract column ids", K(ret)); } else if (OB_FAIL(is_columns_contain_pkey(table_metas, col_ids, table_id, is_pkey, is_union_pkey))) { LOG_WARN("failed to check is columns contain pkey", K(ret)); + } else { + assign_value(table_id, table_id_ptr); } return ret; } @@ -2878,25 +3559,28 @@ int ObOptSelectivity::extract_column_ids(const ObIArray &col_exprs, int ret = OB_SUCCESS; ObColumnRefRawExpr *column_expr = NULL; table_id = OB_INVALID_INDEX; - for (int64_t i = 0; OB_SUCC(ret) && i < col_exprs.count(); ++i) { + bool from_same_table = true; + for (int64_t i = 0; OB_SUCC(ret) && from_same_table && i < col_exprs.count(); ++i) { ObRawExpr *cur_expr = col_exprs.at(i); - if (OB_ISNULL(cur_expr) || OB_UNLIKELY(!cur_expr->is_column_ref_expr())) { + if (OB_ISNULL(cur_expr)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected expr", K(ret)); + } else if (!cur_expr->is_column_ref_expr()) { + // do nothing } else if (FALSE_IT(column_expr = static_cast(cur_expr))) { } else if (OB_FAIL(col_ids.push_back(column_expr->get_column_id()))) { LOG_WARN("failed to push back column id", K(ret)); - } else if (0 == i) { + } else if (OB_INVALID_INDEX == table_id) { table_id = column_expr->get_table_id(); - } else if (OB_UNLIKELY(table_id != column_expr->get_table_id())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("columns not belong to same table", K(ret), K(*column_expr), K(table_id)); + } else if (table_id != column_expr->get_table_id()) { + from_same_table = false; + table_id = OB_INVALID_INDEX; } } return ret; } -int ObOptSelectivity::classify_quals(const OptSelectivityCtx &ctx, +int ObOptSelectivity::classify_quals_deprecated(const OptSelectivityCtx &ctx, const ObIArray &quals, ObIArray &all_predicate_sel, ObIArray &column_sel_infos) @@ -2906,9 +3590,14 @@ int ObOptSelectivity::classify_quals(const OptSelectivityCtx &ctx, ObSEArray column_exprs; OptSelInfo *sel_info = NULL; double tmp_selectivity = 1.0; - ObArenaAllocator tmp_alloc("ObOptSel"); - ObSelEstimatorFactory factory(tmp_alloc); + ObSelEstimatorFactory factory; ObSEArray range_estimators; + if (OB_ISNULL(ctx.get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + factory.get_allocator().set_tenant_id(ctx.get_session_info()->get_effective_tenant_id()); + } for (int64_t i = 0; OB_SUCC(ret) && i < quals.count(); ++i) { column_exprs.reset(); uint64_t column_id = OB_INVALID_ID; @@ -2974,7 +3663,7 @@ int ObOptSelectivity::classify_quals(const OptSelectivityCtx &ctx, ObObj obj_min; ObObj obj_max; if (OB_ISNULL(range_estimator = static_cast(range_estimators.at(i))) || - OB_UNLIKELY(ObSelEstType::RANGE != range_estimator->get_type()) || + OB_UNLIKELY(ObSelEstType::COLUMN_RANGE != range_estimator->get_type()) || OB_ISNULL(column_expr = range_estimator->get_column_expr())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected expr", K(ret)); @@ -2995,6 +3684,92 @@ int ObOptSelectivity::classify_quals(const OptSelectivityCtx &ctx, return ret; } +int ObOptSelectivity::classify_quals(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObIArray &quals, + ObIArray &all_predicate_sel, + ObIArray &column_sel_infos) +{ + int ret = OB_SUCCESS; + ObRawExpr *qual = NULL; + ObSEArray column_exprs; + OptSelInfo *sel_info = NULL; + double tmp_selectivity = 1.0; + ObSelEstimatorFactory factory; + ObSEArray estimators; + ObObj obj_min; + ObObj obj_max; + if (OB_ISNULL(ctx.get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + factory.get_allocator().set_tenant_id(ctx.get_session_info()->get_effective_tenant_id()); + } + for (int64_t i = 0; OB_SUCC(ret) && i < quals.count(); ++i) { + column_exprs.reset(); + uint64_t column_id = OB_INVALID_ID; + ObColumnRefRawExpr *column_expr = NULL; + ObSelEstimator *range_estimator = NULL; + uint64_t temp_equal_count = 0; + sel_info = NULL; + if (OB_ISNULL(qual = quals.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected expr", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(qual, column_exprs))) { + LOG_WARN("failed to extract column exprs", K(ret)); + } else if (1 == column_exprs.count()) { + column_expr = static_cast(column_exprs.at(0)); + column_id = column_expr->get_column_id(); + if (OB_FAIL(get_opt_sel_info(column_sel_infos, column_expr->get_column_id(), sel_info))) { + LOG_WARN("failed to get opt sel info", K(ret)); + } else if (OB_FAIL(sel_info->quals_.push_back(qual))) { + LOG_WARN("failed to push back", K(ret)); + } else if (OB_FAIL(extract_equal_count(*qual, temp_equal_count))) { + LOG_WARN("failed to extract equal count", K(ret)); + } else if (0 == sel_info->equal_count_) { + sel_info->equal_count_ = temp_equal_count; + } else if (temp_equal_count > 0) { + sel_info->equal_count_ = std::min(sel_info->equal_count_, temp_equal_count); + } + } else { + // use OB_INVALID_ID represent qual contain more than one column + } + } + + for (int64_t i = 0; OB_SUCC(ret) && i < column_sel_infos.count(); ++i) { + estimators.reuse(); + OptSelInfo &sel_info = column_sel_infos.at(i); + ObRangeSelEstimator *range_estimator = NULL; + obj_min.set_min_value(); + obj_max.set_max_value(); + if (OB_FAIL(factory.create_estimators(ctx, sel_info.quals_, estimators))) { + LOG_WARN("failed to create estimators", K(ret)); + } else if (OB_FAIL(calculate_selectivity(table_metas, ctx, estimators, sel_info.selectivity_))) { + LOG_WARN("failed to calc sel", K(ret)); + } + for (int64_t j = 0; OB_SUCC(ret) && NULL == range_estimator && j < estimators.count(); j ++) { + if (OB_ISNULL(estimators.at(j))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(sel_info), K(estimators)); + } else if (ObSelEstType::COLUMN_RANGE == estimators.at(j)->get_type()) { + range_estimator = static_cast(estimators.at(j)); + if (OB_FAIL(ObOptSelectivity::get_column_range_min_max( + ctx, range_estimator->get_column_expr(), range_estimator->get_range_exprs(), obj_min, obj_max))) { + LOG_WARN("failed to get min max", K(ret)); + } else { + if (!obj_min.is_null()) { + sel_info.min_ = obj_min; + } + if (!obj_max.is_null()) { + sel_info.max_ = obj_max; + } + } + } + } + } + return ret; +} + int ObOptSelectivity::get_opt_sel_info(ObIArray &column_sel_infos, const uint64_t column_id, OptSelInfo *&sel_info) @@ -3108,6 +3883,7 @@ double ObOptSelectivity::scale_distinct(double selected_rows, if (ndv > OB_DOUBLE_EPSINON && rows > OB_DOUBLE_EPSINON) { new_ndv = ndv * (1 - std::pow(1 - selected_rows / rows, rows / ndv)); } + new_ndv = std::min(new_ndv, selected_rows); } new_ndv = revise_ndv(new_ndv); return new_ndv; @@ -3262,33 +4038,6 @@ int ObOptSelectivity::get_join_pred_rows(const ObHistogram &left_hist, // return ret; // } -double ObOptSelectivity::get_filters_selectivity(ObIArray &selectivities, FilterDependencyType type) -{ - double selectivity = 0.0; - if (FilterDependencyType::INDEPENDENT == type) { - selectivity = 1.0; - for (int64_t i = 0; i < selectivities.count(); i ++) { - selectivity *= selectivities.at(i); - } - } else if (FilterDependencyType::MUTEX_OR == type) { - selectivity = 0.0; - for (int64_t i = 0; i < selectivities.count(); i ++) { - selectivity += selectivities.at(i); - } - } else if (FilterDependencyType::EXPONENTIAL_BACKOFF == type) { - selectivity = 1.0; - if (!selectivities.empty()) { - double exp = 1.0; - lib::ob_sort(&selectivities.at(0), &selectivities.at(0) + selectivities.count()); - for (int64_t i = 0; i < selectivities.count(); i ++) { - selectivity *= std::pow(selectivities.at(i), exp); - exp /= 2; - } - } - } - selectivity = revise_between_0_1(selectivity); - return selectivity; -} int ObOptSelectivity::remove_ignorable_func_for_est_sel(const ObRawExpr *&expr) { @@ -3365,5 +4114,492 @@ double ObOptSelectivity::get_set_stmt_output_count(double count1, double count2, return output_count; } +int ObOptSelectivity::calculate_expr_avg_len(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + double &avg_len) +{ + int ret = OB_SUCCESS; + // default + avg_len = ObOptEstCost::get_estimate_width_from_type(expr->get_result_type()); + const OptDynamicExprMeta *dynamic_expr_meta = NULL; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", K(ret)); + } else if (expr->is_column_ref_expr()) { + if (OB_FAIL(get_column_avg_len(table_metas, ctx, expr, avg_len))) { + LOG_WARN("failed to get avg len", K(ret)); + } + } else if (expr->is_static_scalar_const_expr()) { + ObObj value; + bool get_value = false; + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + expr, + value, + get_value, + ctx.get_allocator()))) { + LOG_WARN("Failed to get const or calculable expr value", K(ret)); + } else if (get_value) { + avg_len = ObOptEstCostModel::DEFAULT_FIXED_OBJ_WIDTH + value.get_deep_copy_size(); + } + } else if (T_OP_CNN == expr->get_expr_type()) { + avg_len = ObOptEstCostModel::DEFAULT_FIXED_OBJ_WIDTH; + for (int64_t i = 0; OB_SUCC(ret) && i < expr->get_param_count(); i ++) { + double child_len = 0; + if (OB_FAIL(SMART_CALL(calculate_expr_avg_len(table_metas, ctx, expr->get_param_expr(i), child_len)))) { + LOG_WARN("failed to calc child avg len", K(ret), KPC(expr)); + } else { + avg_len += child_len - ObOptEstCostModel::DEFAULT_FIXED_OBJ_WIDTH; + } + } + } else if (expr->is_sys_func_expr()) { + if (T_FUN_SYS_REPLACE == expr->get_expr_type() || + (T_FUN_SYS_CAST == expr->get_expr_type() && CM_IS_IMPLICIT_CAST(expr->get_extra()) )) { + if (OB_UNLIKELY(expr->get_param_count() < 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr param", KPC(expr)); + } else if (OB_FAIL(SMART_CALL(calculate_expr_avg_len(table_metas, ctx, expr->get_param_expr(0), avg_len)))) { + LOG_WARN("failed to calc child avg len", K(ret), KPC(expr)); + } + } else if (T_FUN_SYS_SUBSTR == expr->get_expr_type() || T_FUN_SYS_SUBSTRB == expr->get_expr_type()) { + double pos = 1; + double sub_len = -1; + double child_len = 0; + ObObj value; + bool get_value = false; + if (OB_UNLIKELY(expr->get_param_count() < 2) || + OB_ISNULL(expr->get_param_expr(1)) || + (expr->get_param_count() == 3 && OB_ISNULL(expr->get_param_expr(2)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr param", KPC(expr)); + } else if (OB_FAIL(SMART_CALL(calculate_expr_avg_len(table_metas, ctx, expr->get_param_expr(0), child_len)))) { + LOG_WARN("failed to calc child avg len", K(ret), KPC(expr)); + } else if (expr->get_param_expr(1)->is_static_scalar_const_expr()) { + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + expr->get_param_expr(1), + value, + get_value, + ctx.get_allocator()))) { + LOG_WARN("Failed to get const or calculable expr value", K(ret)); + } else if (!get_value) { + // do nothing + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&value, pos))) { + LOG_WARN("failed to convert obj to double", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (3 == expr->get_param_count() && expr->get_param_expr(2)->is_static_scalar_const_expr()) { + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + expr->get_param_expr(2), + value, + get_value, + ctx.get_allocator()))) { + LOG_WARN("Failed to get const or calculable expr value", K(ret)); + } else if (!get_value) { + // do nothing + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&value, sub_len))) { + LOG_WARN("failed to convert obj to double", K(ret)); + } + } + avg_len = child_len; + if (OB_SUCC(ret)) { + avg_len -= ObOptEstCostModel::DEFAULT_FIXED_OBJ_WIDTH; + if (pos <= 0) { + avg_len = std::min(avg_len, -pos); + } else if (pos >= 1) { + avg_len -= pos - 1; + } + if (sub_len >= 0) { + avg_len = std::min(avg_len, sub_len); + } + avg_len += ObOptEstCostModel::DEFAULT_FIXED_OBJ_WIDTH; + } + } + } else if (OB_NOT_NULL(dynamic_expr_meta = table_metas.get_dynamic_expr_meta(expr))) { + avg_len = dynamic_expr_meta->get_avg_len(); + } else if (expr->is_exec_param_expr()) { + if (OB_FAIL(SMART_CALL(calculate_expr_avg_len( + table_metas, ctx, static_cast(expr)->get_ref_expr(), avg_len)))) { + LOG_WARN("failed to calc ref avg len", K(ret), KPC(expr)); + } + } else { /*do nothing*/ } + return ret; +} + +int ObOptSelectivity::get_column_avg_len(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + double &avg_len) +{ + int ret = OB_SUCCESS; + const ObColumnRefRawExpr *column_expr = static_cast(expr); + if (OB_ISNULL(expr) || OB_UNLIKELY(!expr->is_column_ref_expr()) || + OB_ISNULL(ctx.get_opt_stat_manager()) || + OB_ISNULL(ctx.get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null param", K(ret), KPC(expr), K(ctx.get_opt_stat_manager())); + } else { + uint64_t table_id = column_expr->get_table_id(); + uint64_t column_id = column_expr->get_column_id(); + uint64_t ref_table_id; + ObSEArray part_ids; + ObSEArray global_part_ids; + const OptTableMeta *table_meta = table_metas.get_table_meta_by_table_id(table_id); + ObGlobalColumnStat stat; + TableItem *table = NULL; + avg_len = ObOptEstCost::get_estimate_width_from_type(expr->get_result_type()); + if (OB_NOT_NULL(table_meta)) { + const OptColumnMeta *column_meta = table_meta->get_column_meta(column_id); + if (OB_NOT_NULL(column_meta)) { + avg_len = column_meta->get_avg_len(); + } + } + } + return ret; +} + +int ObOptSelectivity::calculate_substrb_info(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *str_expr, + const double substrb_len, + const double cur_rows, + double &substr_ndv, + double &nns) +{ + int ret = OB_SUCCESS; + double expr_len = 0.0; + double expr_ndv = 0.0; + nns = 1.0; + substr_ndv = 1.0; + if (OB_ISNULL(str_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (substrb_len <= 0) { + substr_ndv = 1.0; + } else if (OB_FAIL(calculate_expr_avg_len(table_metas, + ctx, + str_expr, + expr_len))) { + LOG_WARN("failed to get expr length", K(ret), KPC(str_expr)); + } else if (OB_FAIL(calculate_distinct(table_metas, + ctx, + *str_expr, + cur_rows, + expr_ndv))) { + LOG_WARN("failed to calculate distinct", K(ret)); + } else if (OB_FAIL(calculate_expr_nns(table_metas, ctx, str_expr, nns))) { + LOG_WARN("failed to calculate nns", KPC(str_expr)); + } else { + expr_len -= ObOptEstCostModel::DEFAULT_FIXED_OBJ_WIDTH; + if (nns > OB_DOUBLE_EPSINON) { + expr_len /= nns; + } + if (expr_len < OB_DOUBLE_EPSINON || expr_len <= substrb_len) { + substr_ndv = expr_ndv; + } else { + substr_ndv = std::pow(expr_ndv, substrb_len / expr_len); + } + } + LOG_TRACE("succeed to calculate substrb ndv", K(substr_ndv), K(expr_ndv), K(substrb_len), K(expr_len)); + return ret; +} + +int ObOptSelectivity::calculate_expr_nns(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + double &nns) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (expr->is_column_ref_expr()) { + if (OB_FAIL(get_column_ndv_and_nns(table_metas, ctx, *expr, NULL, &nns))) { + LOG_WARN("failed to get column ndv and nns", K(ret)); + } + } else { + // todo: wuyuming.wym null propagate expr + nns = 1.0; + } + return ret; +} + +int ObOptSelectivity::calc_expr_min_max(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + ObObj &min_value, + ObObj &max_value) +{ + int ret = OB_SUCCESS; + min_value.set_min_value(); + max_value.set_max_value(); + if (OB_ISNULL(expr) || OB_ISNULL(ctx.get_opt_ctx().get_exec_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (expr->get_result_type().is_ext()) { + // do nothing + } else if (expr->is_column_ref_expr()) { + if (OB_FAIL(ObOptSelectivity::get_column_min_max(table_metas, ctx, *expr, + min_value, max_value))) { + LOG_WARN("failed to get min max", K(ret)); + } + } else if (expr->is_static_scalar_const_expr()) { + ObObj result; + bool got_result = false; + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + expr, + result, + got_result, + ctx.get_allocator()))) { + LOG_WARN("fail to calc_const_or_calculable_expr", K(ret)); + } else if (!got_result || result.is_null()) { + // do nothing + } else { + min_value = result; + max_value = result; + } + } else if (expr->get_param_count() < 1) { + // do nothing + } else if (T_FUN_SYS_CAST == expr->get_expr_type()) { + bool is_monotonic = false; + const ObRawExpr *param_expr = expr->get_param_expr(0); + if (OB_FAIL(ObObjCaster::is_cast_monotonic(param_expr->get_data_type(), expr->get_data_type(), is_monotonic))) { + LOG_WARN("check cast monotonic error", KPC(expr), K(ret)); + } else if (!is_monotonic) { + // do nothing + } else if (OB_FAIL(SMART_CALL(calc_expr_min_max(table_metas, ctx, param_expr, + min_value, max_value)))) { + LOG_WARN("failed to calc date min max", KPC(expr)); + } else if (min_value.is_min_value() || max_value.is_min_value() || + min_value.is_max_value() || max_value.is_max_value() || + min_value.is_null() || max_value.is_null()) { + // do nothing + } else if (OB_FAIL(convert_obj_to_expr_type(ctx, expr, expr->get_extra(), min_value))) { + ret = OB_SUCCESS; + min_value.set_min_value(); + } else if (OB_FAIL(convert_obj_to_expr_type(ctx, expr, expr->get_extra(), max_value))) { + ret = OB_SUCCESS; + max_value.set_max_value(); + } + } else if (T_FUN_SYS_DATE == expr->get_expr_type()) { + if (OB_FAIL(SMART_CALL(calc_expr_min_max(table_metas, ctx, expr->get_param_expr(0), + min_value, max_value)))) { + LOG_WARN("failed to calc date min max", KPC(expr)); + } + } else if (is_dense_time_expr_type(expr->get_expr_type()) || + T_FUN_SYS_EXTRACT == expr->get_expr_type()) { + const ObRawExpr *param_expr = NULL; + ObItemType est_type = expr->get_expr_type(); + int64_t extract_type = DATE_UNIT_MAX; + if (T_FUN_SYS_EXTRACT == expr->get_expr_type()) { + bool valid = false; + ObObj result; + param_expr = expr->get_param_expr(1); + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + expr->get_param_expr(0), + result, + valid, + ctx.get_allocator()))) { + LOG_WARN("fail to calc_const_or_calculable_expr", K(ret)); + } else if (valid && result.is_int()) { + extract_type = result.get_int(); + switch (extract_type) { + case DATE_UNIT_DAY: est_type = T_FUN_SYS_DAY; break; + case DATE_UNIT_WEEK: est_type = T_FUN_SYS_WEEK; break; + case DATE_UNIT_MONTH: est_type = T_FUN_SYS_MONTH; break; + case DATE_UNIT_QUARTER: est_type = T_FUN_SYS_QUARTER; break; + case DATE_UNIT_SECOND: est_type = T_FUN_SYS_SECOND; break; + case DATE_UNIT_MINUTE: est_type = T_FUN_SYS_MINUTE; break; + case DATE_UNIT_HOUR: est_type = T_FUN_SYS_HOUR; break; + case DATE_UNIT_YEAR: est_type = T_FUN_SYS_YEAR; break; + // we only estimate the min/max value by year, so yearmonth is similar with yearweek + case DATE_UNIT_YEAR_MONTH: est_type = T_FUN_SYS_YEARWEEK_OF_DATE; break; + default: break; + } + } + } else { + param_expr = expr->get_param_expr(0); + } + if (OB_SUCC(ret)) { + bool use_default = false; + int64_t min_int_value = 0; + int64_t max_int_value = 0; + switch (est_type) { + case T_FUN_SYS_MONTH: min_int_value = 1; max_int_value = 12; break; + case T_FUN_SYS_DAY_OF_MONTH: + case T_FUN_SYS_DAY: min_int_value = 1; max_int_value = 31; break; + case T_FUN_SYS_DAY_OF_YEAR: min_int_value = 1; max_int_value = 366; break; + case T_FUN_SYS_WEEK_OF_YEAR: + case T_FUN_SYS_WEEK: min_int_value = 0; max_int_value = 53; break; + case T_FUN_SYS_WEEKDAY_OF_DATE: min_int_value = 0; max_int_value = 6; break; + case T_FUN_SYS_DAY_OF_WEEK: min_int_value = 1; max_int_value = 7; break; + case T_FUN_SYS_QUARTER: min_int_value = 1; max_int_value = 4; break; + case T_FUN_SYS_HOUR: min_int_value = 0; max_int_value = 23; break; + case T_FUN_SYS_MINUTE: min_int_value = 0; max_int_value = 59; break; + case T_FUN_SYS_SECOND: min_int_value = 0; max_int_value = 59; break; + case T_FUN_SYS_YEAR: + case T_FUN_SYS_YEARWEEK_OF_DATE: { + if (OB_FAIL(calc_year_min_max(table_metas, + ctx, + param_expr, + min_int_value, + max_int_value, + use_default))) { + LOG_WARN("failed to calculate expr min max", K(ret)); + } else if (!use_default && T_FUN_SYS_YEARWEEK_OF_DATE == est_type) { + // approximately + min_int_value = min_int_value * 100; + max_int_value = max_int_value * 100 + 100; + } + break; + } + default: use_default = true; break; + } + if (OB_SUCC(ret) && !use_default) { + min_value.set_int(min_int_value); + max_value.set_int(max_int_value); + if (OB_FAIL(convert_obj_to_expr_type(ctx, expr, CM_NONE, min_value))) { + LOG_WARN("failed to convert obj", K(ret), K(min_value)); + } else if (OB_FAIL(convert_obj_to_expr_type(ctx, expr, CM_NONE, min_value))) { + LOG_WARN("failed to convert obj", K(ret), K(max_value)); + } + } + } + } + if (OB_SUCC(ret)) { + int cmp_result = 0; + bool can_cmp = min_value.can_compare(max_value); + if (min_value.is_min_value() || max_value.is_max_value()) { + // do nothing + } else if (can_cmp && + OB_FAIL(min_value.compare(max_value, cmp_result))) { + LOG_WARN("failed to compare", K(ret)); + } else if (!can_cmp || 1 == cmp_result || + min_value.is_null() || max_value.is_null()) { + min_value.set_min_value(); + max_value.set_max_value(); + } + } + return ret; +} + +int ObOptSelectivity::calc_year_min_max(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + int64_t &min_year, + int64_t &max_year, + bool &use_default) +{ + int ret = OB_SUCCESS; + ObObj min_value; + ObObj max_value; + ObObj min_year_obj; + ObObj max_year_obj; + ObTime min_time; + ObTime max_time; + use_default = false; + ObDateSqlMode date_sql_mode; + if (OB_ISNULL(expr) || OB_ISNULL(ctx.get_session_info()) || OB_ISNULL(ctx.get_opt_ctx().get_exec_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (ObDateTimeTC != expr->get_type_class() && + ObDateTC != expr->get_type_class() && + ObOTimestampTC != expr->get_type_class()) { + use_default = true; + } else if (OB_FAIL(SMART_CALL(calc_expr_min_max(table_metas, + ctx, + expr, + min_value, + max_value)) )) { + LOG_WARN("failed to calculate expr min max", K(ret)); + } else if (min_value.is_min_value() || max_value.is_max_value()) { + use_default = true; + } else if (FALSE_IT(date_sql_mode.init(ctx.get_session_info()->get_sql_mode()))) { + } else if (OB_FAIL(ob_obj_to_ob_time_with_date(min_value, + get_timezone_info(ctx.get_session_info()), + min_time, + get_cur_time(ctx.get_opt_ctx().get_exec_ctx()->get_physical_plan_ctx()), + date_sql_mode))) { + ret = OB_SUCCESS; + use_default = true; + } else if (OB_FAIL(ob_obj_to_ob_time_with_date(max_value, + get_timezone_info(ctx.get_session_info()), + max_time, + get_cur_time(ctx.get_opt_ctx().get_exec_ctx()->get_physical_plan_ctx()), + date_sql_mode))) { + ret = OB_SUCCESS; + use_default = true; + } else { + min_year = min_time.parts_[DT_YEAR]; + max_year = max_time.parts_[DT_YEAR]; + } + return ret; +} + +int ObOptSelectivity::calc_const_numeric_value(const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + double &value, + bool &succ) +{ + int ret = OB_SUCCESS; + ObObj result; + bool got_result = false; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (!expr->is_static_scalar_const_expr()) { + succ = false; + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + expr, + result, + got_result, + ctx.get_allocator()))) { + LOG_WARN("fail to calc_const_or_calculable_expr", K(ret)); + } else if (!got_result || result.is_null() || !ob_is_numeric_type(result.get_type())) { + succ = false; + } else if (OB_FAIL(ObOptEstObjToScalar::convert_obj_to_double(&result, value))) { + LOG_WARN("Failed to convert obj using old method", K(ret)); + } else { + succ = true; + } + return ret; +} + +int ObOptSelectivity::convert_obj_to_expr_type(const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + ObCastMode cast_mode, + ObObj &obj) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(expr) || OB_ISNULL(ctx.get_opt_ctx().get_exec_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", KPC(expr)); + } else { + ObObj tmp; + const ObDataTypeCastParams dtc_params = + ObBasicSessionInfo::create_dtc_params(ctx.get_session_info()); + ObCastCtx cast_ctx(&ctx.get_allocator(), + &dtc_params, + get_cur_time(ctx.get_opt_ctx().get_exec_ctx()->get_physical_plan_ctx()), + cast_mode, + expr->get_result_type().get_collation_type()); + ObAccuracy res_acc; + if (expr->get_result_type().is_decimal_int()) { + res_acc = expr->get_result_type().get_accuracy(); + cast_ctx.res_accuracy_ = &res_acc; + } + if (OB_FAIL(ObObjCaster::to_type(expr->get_result_type().get_type(), + expr->get_result_type().get_collation_type(), + cast_ctx, + obj, + tmp))) { + LOG_WARN("failed to cast value", K(ret)); + } else { + obj = tmp; + } + } + return ret; +} + }//end of namespace sql }//end of namespace oceanbase diff --git a/src/sql/optimizer/ob_opt_selectivity.h b/src/sql/optimizer/ob_opt_selectivity.h index a6b818810f..d88dd99b45 100644 --- a/src/sql/optimizer/ob_opt_selectivity.h +++ b/src/sql/optimizer/ob_opt_selectivity.h @@ -51,11 +51,72 @@ struct ColumnItem; struct RangeExprs; struct ObExprSelPair; -enum class FilterDependencyType +class ObEstCorrelationModel { - INDEPENDENT, - MUTEX_OR, - EXPONENTIAL_BACKOFF, +public: + static ObEstCorrelationModel &get_correlation_model(ObEstCorrelationType type); + + virtual double combine_filters_selectivity(ObIArray &selectivities) const = 0; + + virtual bool is_independent() const = 0; + +protected: + ObEstCorrelationModel() {} + virtual ~ObEstCorrelationModel() = default; + +private: + DISALLOW_COPY_AND_ASSIGN(ObEstCorrelationModel); +}; + +class ObIndependentModel : public ObEstCorrelationModel +{ +public: + static ObEstCorrelationModel& get_model(); + + virtual double combine_filters_selectivity(ObIArray &selectivities) const override; + + virtual bool is_independent() const override { return true; }; + +protected: + ObIndependentModel() {} + virtual ~ObIndependentModel() = default; + +private: + DISALLOW_COPY_AND_ASSIGN(ObIndependentModel); +}; + +class ObPartialCorrelationModel : public ObEstCorrelationModel +{ +public: + static ObEstCorrelationModel& get_model(); + + virtual double combine_filters_selectivity(ObIArray &selectivities) const override; + + virtual bool is_independent() const { return false; } + +protected: + ObPartialCorrelationModel() {} + virtual ~ObPartialCorrelationModel() = default; + +private: + DISALLOW_COPY_AND_ASSIGN(ObPartialCorrelationModel); +}; + +class ObFullCorrelationModel : public ObEstCorrelationModel +{ +public: + static ObEstCorrelationModel& get_model(); + + virtual double combine_filters_selectivity(ObIArray &selectivities) const override; + + virtual bool is_independent() const { return false; } + +protected: + ObFullCorrelationModel() {} + virtual ~ObFullCorrelationModel() = default; + +private: + DISALLOW_COPY_AND_ASSIGN(ObFullCorrelationModel); }; class OptSelectivityCtx @@ -72,7 +133,8 @@ class OptSelectivityCtx row_count_1_(-1.0), row_count_2_(-1.0), current_rows_(-1.0), - dependency_type_(FilterDependencyType::INDEPENDENT) + ambient_card_(NULL), + assumption_type_(UNKNOWN_JOIN) { } ObOptimizerContext &get_opt_ctx() const { return const_cast(opt_ctx_); } @@ -108,11 +170,16 @@ class OptSelectivityCtx const ObRelIds *get_right_rel_ids() const { return right_rel_ids_; } double get_row_count_1() const { return row_count_1_; } double get_row_count_2() const { return row_count_2_; } + double get_left_row_count() const { return row_count_1_; } + double get_right_row_count() const { return row_count_2_; } double get_current_rows() const { return current_rows_; } void set_current_rows(const double current_rows) { current_rows_ = current_rows; } - FilterDependencyType get_dependency_type() const { return dependency_type_; } - void set_dependency_type(FilterDependencyType type) { dependency_type_ = type; } + + const ObEstCorrelationModel &get_correlation_model() const + { + return ObEstCorrelationModel::get_correlation_model(opt_ctx_.get_correlation_type()); + } uint64_t get_compat_version() const { return OB_ISNULL(opt_ctx_.get_query_ctx()) ? 0 : @@ -126,20 +193,29 @@ class OptSelectivityCtx get_opt_ctx().get_query_ctx()->check_opt_compat_version(args...); } + void set_ambient_card(const ObIArray *ambient_card) { ambient_card_ = ambient_card; } + const ObIArray *get_ambient_card() const { return ambient_card_; } + int get_ambient_card(const uint64_t table_id, double &table_ambient_card) const; + int get_ambient_card(const ObRelIds &rel_ids, double &table_ambient_card) const; + void set_assumption_type(ObJoinType type) { assumption_type_ = type; } + ObJoinType get_assumption_type() const { + return UNKNOWN_JOIN == assumption_type_ ? join_type_ : assumption_type_; + } + void init_op_ctx(const EqualSets *equal_sets, const double current_rows, - FilterDependencyType dependency_type = FilterDependencyType::INDEPENDENT) + const ObIArray *ambient_card = NULL) { + join_type_ = UNKNOWN_JOIN; + left_rel_ids_ = NULL; + right_rel_ids_ = NULL; equal_sets_ = equal_sets; current_rows_ = current_rows; - dependency_type_ = dependency_type; - } - void init_row_count(const double row_count1, const double row_count2) - { - row_count_1_ = row_count1; - row_count_2_ = row_count2; - dependency_type_ = FilterDependencyType::INDEPENDENT; + ambient_card_ = ambient_card; } + // child should be in the same query block + void init_op_ctx(ObLogicalOperator *child); + void init_join_ctx(const ObJoinType join_type, const ObRelIds *left_rel_ids, const ObRelIds *right_rel_ids, const double rc1, const double rc2, const EqualSets *equal_sets = NULL) @@ -151,13 +227,31 @@ class OptSelectivityCtx row_count_2_ = rc2; current_rows_ = -1.0; equal_sets_ = equal_sets; - dependency_type_ = FilterDependencyType::INDEPENDENT; + ambient_card_ = NULL; } - void clear_equal_sets() { equal_sets_ = NULL; } + void clear() + { + join_type_ = UNKNOWN_JOIN; + left_rel_ids_ = NULL; + right_rel_ids_ = NULL; + equal_sets_ = NULL; + current_rows_ = -1; + ambient_card_ = NULL; + } + + void init_row_count(const double row_count1, const double row_count2) + { + join_type_ = UNKNOWN_JOIN; + left_rel_ids_ = NULL; + right_rel_ids_ = NULL; + row_count_1_ = row_count1; + row_count_2_ = row_count2; + ambient_card_ = NULL; + } TO_STRING_KV(KP_(stmt), KP_(equal_sets), K_(join_type), KP_(left_rel_ids), KP_(right_rel_ids), - K_(row_count_1), K_(row_count_2), K_(current_rows), K_(dependency_type)); + K_(row_count_1), K_(row_count_2), K_(current_rows), KPC_(ambient_card)); private: ObOptimizerContext &opt_ctx_; @@ -178,7 +272,13 @@ class OptSelectivityCtx double row_count_1_; double row_count_2_; double current_rows_; - FilterDependencyType dependency_type_; + const ObIArray *ambient_card_; + + /** + * The join type which determines the estimation assumption. + * Used to calculate the selectivity of ambient card. + */ + ObJoinType assumption_type_; }; class OptColumnMeta @@ -265,6 +365,30 @@ enum OptTableStatType { DS_TABLE_STAT //dynamic sampling table stat }; +class OptDynamicExprMeta +{ +public: + OptDynamicExprMeta(): avg_len_(0) {} + void set_expr(const ObRawExpr *expr) { expr_ = expr; } + const ObRawExpr *get_expr() const { return expr_; } + void set_avg_len(double avg_len) { avg_len_ = avg_len; } + double get_avg_len() const { return avg_len_; } + + int assign(const OptDynamicExprMeta &other) + { + int ret = OB_SUCCESS; + expr_ = other.expr_; + avg_len_ = other.avg_len_; + return ret; + } + + TO_STRING_KV(KP_(expr), KPC_(expr), K_(avg_len)); +private: + const ObRawExpr *expr_; + double avg_len_; + DISALLOW_COPY_AND_ASSIGN(OptDynamicExprMeta); +}; + class OptTableMeta { public: @@ -286,7 +410,8 @@ public: distinct_rows_(0.0), table_partition_info_(NULL), base_meta_info_(NULL), - real_rows_(-1.0) + real_rows_(-1.0), + stale_stats_(false) {} int assign(const OptTableMeta &other); @@ -348,7 +473,8 @@ public: void set_stat_locked(bool locked) { stat_locked_ = locked; } double get_distinct_rows() const { return distinct_rows_; } void set_distinct_rows(double rows) { distinct_rows_ = rows; } - void set_ndv_for_all_column(double ndv); + bool is_opt_stat_expired() const { return stale_stats_; } + void set_stale_stats(bool stale_stats) { stale_stats_ = stale_stats; } share::schema::ObTableType get_table_type() const { return table_type_; } @@ -389,6 +515,8 @@ private: const ObTablePartitionInfo *table_partition_info_; const ObTableMetaInfo *base_meta_info_; double real_rows_; + //mark stat is expired + bool stale_stats_; }; struct OptSelectivityDSParam { @@ -423,7 +551,8 @@ public: int64_t last_analyzed, bool is_stat_locked, const ObTablePartitionInfo *table_partition_info, - const ObTableMetaInfo *base_meta_info); + const ObTableMetaInfo *base_meta_info, + bool stale_stats); int add_set_child_stmt_meta_info(const ObSelectStmt *parent_stmt, const ObSelectStmt *child_stmt, @@ -453,15 +582,22 @@ public: double &ndv); common::ObIArray& get_table_metas() { return table_metas_; } + const common::ObIArray& get_table_metas() const { return table_metas_; } const OptTableMeta* get_table_meta_by_table_id(const uint64_t table_id) const; OptTableMeta* get_table_meta_by_table_id(const uint64_t table_id); const OptColumnMeta* get_column_meta_by_table_id(const uint64_t table_id, const uint64_t column_id) const; + const OptDynamicExprMeta* get_dynamic_expr_meta(const ObRawExpr *expr) const; + int add_dynamic_expr_meta(const OptDynamicExprMeta &dynamic_expr_meta) { + return dynamic_expr_metas_.push_back(dynamic_expr_meta); + } + const ObIArray &get_dynamic_expr_metas() const { return dynamic_expr_metas_; } - int get_rows(const uint64_t table_id, double &rows); - TO_STRING_KV(K_(table_metas)); + double get_rows(const uint64_t table_id) const; + TO_STRING_KV(K_(table_metas), K_(dynamic_expr_metas)); private: common::ObSEArray table_metas_; + common::ObSEArray dynamic_expr_metas_; }; struct OptSelInfo @@ -487,13 +623,30 @@ struct OptSelInfo bool has_range_exprs_; ObObj min_; ObObj max_; + ObSEArray quals_; }; class ObSelEstimator; +struct OptDistinctHelper +{ +public: + OptDistinctHelper() {} + + TO_STRING_KV(K_(rel_id), K_(exprs)); + + ObRelIds rel_id_; + ObSEArray exprs_; +}; + class ObOptSelectivity { public: + static int calculate_selectivity(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + ObIArray &sel_estimators, + double &selectivity); + // @brief 计算一组条件的选择率,条件之间是and关系,基于独立性假设 static int calculate_selectivity(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, @@ -501,6 +654,20 @@ public: double &selectivity, common::ObIArray &all_predicate_sel); + static int calculate_conditional_selectivity(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + common::ObIArray &total_filters, + common::ObIArray &append_filters, + double &total_sel, + double &conditional_sel, + ObIArray &all_predicate_sel); + + static int calculate_join_selectivity(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const common::ObIArray &quals, + double &selectivity, + common::ObIArray &all_predicate_sel); + static int calculate_qual_selectivity(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObRawExpr &qual, @@ -528,6 +695,26 @@ public: const ObNewRange &range, double &selectivity); + static int calculate_table_ambient_cardinality(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRelIds &rel_id, + const double cur_rows, + double &table_ambient_card); + + static int calculate_distinct_in_single_table(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRelIds &rel_ids, + const common::ObIArray& exprs, + const double cur_rows, + double &rows); + + static int remove_dummy_distinct_exprs(ObIArray &helpers, + ObIArray &exprs); + + static int check_expr_in_distinct_helper(const ObRawExpr *expr, + const ObIArray &helpers, + bool &is_dummy_expr); + // @brief 计算一组变量的distinct static int calculate_distinct(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, @@ -536,6 +723,17 @@ public: double &rows, const bool need_refine = true); + static int calculate_distinct(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr& expr, + const double origin_rows, + double &rows, + const bool need_refine = true); + + static double combine_two_ndvs(double ambient_card, double ndv1, double ndv2); + + static double combine_ndvs(double ambient_card, ObIArray &ndvs); + // ndv 按照行数进行缩放. static double scale_distinct(double selected_rows, double rows, double ndv); @@ -642,8 +840,8 @@ public: const ObRawExpr &expr, double *ndv_ptr, double *num_null_ptr, - double *row_count_ptr, - double *avg_len_ptr); + double *avg_len_ptr, + double *row_count_ptr); static int get_column_hist_scale(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, @@ -751,9 +949,10 @@ public: * 检查一组expr是否包含所在表的主键 */ static int is_columns_contain_pkey(const OptTableMetas &table_metas, - const ObIArray &col_exprs, + const ObIArray &exprs, bool &is_pkey, - bool &is_union_pkey); + bool &is_union_pkey, + uint64_t *table_id_ptr = NULL); static int is_columns_contain_pkey(const OptTableMetas &table_metas, const ObIArray &col_ids, @@ -768,7 +967,13 @@ public: ObIArray &col_ids, uint64_t &table_id); - static int classify_quals(const OptSelectivityCtx &ctx, + static int classify_quals_deprecated(const OptSelectivityCtx &ctx, + const ObIArray &quals, + ObIArray &all_predicate_sel, + ObIArray &column_sel_infos); + + static int classify_quals(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, const ObIArray &quals, ObIArray &all_predicate_sel, ObIArray &column_sel_infos); @@ -825,7 +1030,6 @@ public: // const ObIArray &predicates, // ObOptDSJoinParam &ds_join_param); - static double get_filters_selectivity(ObIArray &selectivities, FilterDependencyType type); static int get_column_min_max(ObRawExpr *expr, OptSelInfo &sel_info); @@ -834,27 +1038,91 @@ public: const OptSelectivityCtx &ctx, double &special_ndv, const double origin_rows); + static int calculate_winfunc_ndv(const OptTableMetas &table_meta, + const ObRawExpr* expr, + const OptSelectivityCtx &ctx, + double &special_ndv, + const double origin_rows); static int calculate_expr_ndv(const ObIArray& exprs, ObIArray& expr_ndv, const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const double origin_rows); - static bool is_special_expr(const ObRawExpr &expr); - static int classify_exprs(const ObIArray& exprs, - ObIArray& column_exprs, - ObIArray& special_exprs, - const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx); - static int classify_exprs(ObRawExpr* expr, - ObIArray& column_exprs, - ObIArray& special_exprs, - const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx); + static int check_is_special_distinct_expr(const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + bool &is_special); + static int classify_exprs(const OptSelectivityCtx &ctx, + const ObIArray& exprs, + ObIArray &helpers, + ObIArray& special_exprs); + static int classify_exprs(const OptSelectivityCtx &ctx, + ObRawExpr *expr, + ObIArray &helpers, + ObIArray& special_exprs); + static int add_expr_to_distinct_helper(ObIArray &helpers, + const ObRelIds &rel_id, + ObRawExpr *expr); static int remove_ignorable_func_for_est_sel(const ObRawExpr *&expr); static int remove_ignorable_func_for_est_sel(ObRawExpr *&expr); static double get_set_stmt_output_count(double count1, double count2, ObSelectStmt::SetOperator set_type); + static int calculate_expr_avg_len(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + double &avg_len); + static int get_column_avg_len(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + double &avg_len); + static int calculate_substrb_info(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *str_expr, + const double substrb_len, + const double cur_rows, + double &ndv, + double &nns); + static int calculate_expr_nns(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + double &nns); + static int calc_expr_min_max(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + ObObj &min_value, + ObObj &max_value); + static int calc_year_min_max(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + int64_t &min_year, + int64_t &max_year, + bool &use_default); + static int calc_const_numeric_value(const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + double &value, + bool &succ); + static int convert_obj_to_expr_type(const OptSelectivityCtx &ctx, + const ObRawExpr *expr, + ObCastMode cast_mode, + ObObj &obj); + static bool is_dense_time_expr_type(ObItemType type) + { + return T_FUN_SYS_YEAR == type || + T_FUN_SYS_DAY == type || + T_FUN_SYS_DAY_OF_MONTH == type || + T_FUN_SYS_MONTH == type || + T_FUN_SYS_DAY_OF_YEAR == type || + T_FUN_SYS_WEEK_OF_YEAR == type || + T_FUN_SYS_WEEKDAY_OF_DATE == type || + T_FUN_SYS_YEARWEEK_OF_DATE == type || + T_FUN_SYS_DAY_OF_WEEK == type || + T_FUN_SYS_WEEK == type || + T_FUN_SYS_QUARTER == type || + T_FUN_SYS_HOUR == type || + T_FUN_SYS_MINUTE == type || + T_FUN_SYS_SECOND == type; + } + private: DISALLOW_COPY_AND_ASSIGN(ObOptSelectivity); }; diff --git a/src/sql/optimizer/ob_optimizer.cpp b/src/sql/optimizer/ob_optimizer.cpp index 93ba34c375..89dea29ccb 100644 --- a/src/sql/optimizer/ob_optimizer.cpp +++ b/src/sql/optimizer/ob_optimizer.cpp @@ -543,6 +543,8 @@ int ObOptimizer::init_env_info(ObDMLStmt &stmt) LOG_WARN("fail to check enable pdml", K(ret)); } else if (OB_FAIL(init_parallel_policy(stmt, *session_info))) { // call after check pdml enabled LOG_WARN("fail to check enable pdml", K(ret)); + } else if (OB_FAIL(init_correlation_model(stmt, *session_info))) { + LOG_WARN("failed to init correlation model", K(ret)); } return ret; } @@ -692,6 +694,39 @@ int ObOptimizer::init_parallel_policy(ObDMLStmt &stmt, const ObSQLSessionInfo &s return ret; } +int ObOptimizer::init_correlation_model(ObDMLStmt &stmt, const ObSQLSessionInfo &session) +{ + int ret = OB_SUCCESS; + ObEstCorrelationModel* correlation_model = NULL; + int64_t type = 0; + bool has_hint = false; + if (OB_ISNULL(ctx_.get_query_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ctx", K(ret)); + } else if (!ctx_.get_query_ctx()->check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + type = static_cast(ObEstCorrelationType::INDEPENDENT); + } else if (OB_FAIL(ctx_.get_global_hint().opt_params_.has_opt_param(ObOptParamHint::CORRELATION_FOR_CARDINALITY_ESTIMATION, has_hint))) { + LOG_WARN("failed to check whether has hint param", K(ret)); + } else if (has_hint) { + if (OB_FAIL(ctx_.get_global_hint().opt_params_.get_enum_opt_param(ObOptParamHint::CORRELATION_FOR_CARDINALITY_ESTIMATION, type))) { + LOG_WARN("failed to get bool hint param", K(ret)); + } + } else if (OB_FAIL(session.get_sys_variable(share::SYS_VAR_CARDINALITY_ESTIMATION_MODEL, type))) { + LOG_WARN("failed to get sys variable", K(ret)); + } + if (OB_SUCC(ret)) { + if (OB_UNLIKELY(type < 0) || + OB_UNLIKELY(type >= static_cast(ObEstCorrelationType::MAX))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected correlation type", K(type)); + } else { + ctx_.set_correlation_type(static_cast(type)); + } + } + return ret; +} + int ObOptimizer::set_auto_dop_params(const ObSQLSessionInfo &session) { int ret = OB_SUCCESS; diff --git a/src/sql/optimizer/ob_optimizer.h b/src/sql/optimizer/ob_optimizer.h index 078a513df7..24bdcfe941 100644 --- a/src/sql/optimizer/ob_optimizer.h +++ b/src/sql/optimizer/ob_optimizer.h @@ -223,6 +223,7 @@ namespace sql int check_force_default_stat(); int init_system_stat(); int calc_link_stmt_count(const ObDMLStmt &stmt, int64_t &count); + int init_correlation_model(ObDMLStmt &stmt, const ObSQLSessionInfo &session); private: ObOptimizerContext &ctx_; diff --git a/src/sql/optimizer/ob_optimizer_context.h b/src/sql/optimizer/ob_optimizer_context.h index c7ee0c02ef..8e0ed049aa 100644 --- a/src/sql/optimizer/ob_optimizer_context.h +++ b/src/sql/optimizer/ob_optimizer_context.h @@ -41,6 +41,14 @@ namespace sql class ObRawExprFactory; class ObLogPlanFactory; +enum class ObEstCorrelationType +{ + INDEPENDENT, + PARTIAL, + FULL, + MAX +}; + typedef common::ObArray ObPlanNotes; //table location local index id related info //tablet_loc_id and ref_table_id_ are used to uniquely determine @@ -235,7 +243,8 @@ ObOptimizerContext(ObSQLSessionInfo *session_info, system_stat_(), storage_estimation_enabled_(false), das_keep_order_enabled_(true), - generate_random_plan_(false) + generate_random_plan_(false), + correlation_type_(ObEstCorrelationType::MAX) { } inline common::ObOptStatManager *get_opt_stat_manager() { return opt_stat_manager_; } inline void set_opt_stat_manager(common::ObOptStatManager *sm) { opt_stat_manager_ = sm; } @@ -610,6 +619,8 @@ ObOptimizerContext(ObSQLSessionInfo *session_info, inline bool generate_random_plan() const { return generate_random_plan_; } inline void set_generate_random_plan(bool rand_plan) { generate_random_plan_ = rand_plan; } + inline void set_correlation_type(ObEstCorrelationType type) { correlation_type_ = type; } + inline ObEstCorrelationType get_correlation_type() const { return correlation_type_; } private: ObSQLSessionInfo *session_info_; ObExecContext *exec_ctx_; @@ -696,6 +707,7 @@ private: bool das_keep_order_enabled_; bool generate_random_plan_; + ObEstCorrelationType correlation_type_; }; } } diff --git a/src/sql/optimizer/ob_optimizer_util.cpp b/src/sql/optimizer/ob_optimizer_util.cpp index f90dc325fe..05b60f3da5 100644 --- a/src/sql/optimizer/ob_optimizer_util.cpp +++ b/src/sql/optimizer/ob_optimizer_util.cpp @@ -3205,6 +3205,18 @@ int ObOptimizerUtil::get_onetime_exprs(ObRawExpr* expr, return ret; } +int ObOptimizerUtil::get_onetime_exprs(ObIArray &exprs, + ObIArray &onetime_exprs) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); i++) { + if (OB_FAIL(get_onetime_exprs(exprs.at(i), onetime_exprs))) { + LOG_WARN("failed to get onetime exprs", K(ret)); + } + } + return ret; +} + int ObOptimizerUtil::get_query_ref_exprs(ObIArray &exprs, ObIArray &subqueries, ObIArray &nested_subqueries) diff --git a/src/sql/optimizer/ob_optimizer_util.h b/src/sql/optimizer/ob_optimizer_util.h index 9a35e431ef..67b595aac7 100644 --- a/src/sql/optimizer/ob_optimizer_util.h +++ b/src/sql/optimizer/ob_optimizer_util.h @@ -611,6 +611,9 @@ public: static int get_onetime_exprs(ObRawExpr* expr, ObIArray &onetime_exprs); + static int get_onetime_exprs(ObIArray &exprs, + ObIArray &onetime_exprs); + static int get_query_ref_exprs(ObIArray &subquery_exprs, ObIArray &subqueries, ObIArray &nested_subqueries); diff --git a/src/sql/optimizer/ob_sel_estimator.cpp b/src/sql/optimizer/ob_sel_estimator.cpp index 293fbbb37e..e41fd2fe8f 100644 --- a/src/sql/optimizer/ob_sel_estimator.cpp +++ b/src/sql/optimizer/ob_sel_estimator.cpp @@ -38,6 +38,189 @@ namespace sql { inline double revise_ndv(double ndv) { return ndv < 1.0 ? 1.0 : ndv; } +void SimpleRange::set_whole_range() +{ + start_.set_min_value(); + end_.set_max_value(); + inclusive_start_ = false; + inclusive_end_ = false; +} + +void SimpleRange::set_false_range() +{ + start_.set_max_value(); + end_.set_min_value(); + inclusive_start_ = false; + inclusive_end_ = false; +} + +int SimpleRange::compare_with_end(const SimpleRange &r) const +{ + int cmp = 0; + if (end_.is_max_value()) { + if (!r.end_.is_max_value()) { + cmp = 1; + } + } else if (r.end_.is_max_value()) { + cmp = -1; + } else { + cmp = end_.compare(r.end_); + if (0 == cmp) { + if (inclusive_end_ && !r.inclusive_end_) { + cmp = 1; + } else if (!inclusive_end_ && r.inclusive_end_) { + cmp = -1; + } + } + } + return cmp; +} + +int SimpleRange::compare_with_start(const SimpleRange &r) const +{ + int cmp = 0; + if (start_.is_min_value()) { + if (!r.start_.is_min_value()) { + cmp = -1; + } + } else if (r.start_.is_min_value()) { + cmp = 1; + } else { + cmp = start_.compare(r.start_); + if (0 == cmp) { + if (inclusive_start_ && !r.inclusive_start_) { + cmp = -1; + } else if (!inclusive_start_ && r.inclusive_start_) { + cmp = 1; + } + } + } + return cmp; +} + +bool SimpleRange::intersect(const SimpleRange &r) +{ + bool bret = false; + if (start_.can_compare(r.start_) && end_.can_compare(r.end_)) { + bret = true; + int cmp_start = compare_with_start(r); + if (cmp_start == -1) { + start_ = r.start_; + inclusive_start_ = r.inclusive_start_; + } + int cmp_end = compare_with_end(r); + if (cmp_end == 1) { + end_ = r.end_; + inclusive_end_ = r.inclusive_end_; + } + } + return bret; +} + +void SimpleRange::set_bound(ObItemType item_type, ObObj bound) +{ + if (bound.is_null()) { + if (T_OP_IS == item_type || T_OP_NSEQ == item_type) { + start_.set_null(); + end_.set_null(); + inclusive_start_ = true; + inclusive_end_ = true; + } else if (T_OP_IS_NOT == item_type) { + set_whole_range(); + } else { + set_false_range(); + } + } else if (T_OP_LE == item_type) { + end_ = bound; + inclusive_end_ = true; + } else if (T_OP_LT == item_type) { + end_ = bound; + inclusive_end_ = false; + } else if (T_OP_GE == item_type) { + start_ = bound; + inclusive_start_ = true; + } else if (T_OP_GT == item_type) { + start_ = bound; + inclusive_start_ = false; + } else if (T_OP_EQ == item_type || T_OP_NSEQ == item_type) { + start_ = bound; + end_ = bound; + inclusive_start_ = true; + inclusive_end_ = true; + } +} + +void SimpleRange::set_bound(ObItemType item_type, double bound) +{ + ObObj obj; + obj.set_double(bound); + set_bound(item_type, obj); +} + +bool SimpleRange::is_valid_range() +{ + bool bret = false; + if (!start_.can_compare(end_)) { + bret = false; + } else if (start_.is_null() && end_.is_null()) { + bret = true; + } else if (start_.is_max_value() || end_.is_min_value() || + start_.is_null() || end_.is_null()) { + bret = false; + } else if (start_.is_min_value() || end_.is_max_value()) { + bret = true; + } else { + int cmp = start_.compare(end_); + if (-1 == cmp) { + bret = true; + } else if (1 == cmp) { + bret = false; + } else if (0 == cmp) { + if (inclusive_start_ && inclusive_end_) { + bret = true; + } else { + bret = false; + } + } + } + return bret; +} + +bool SimpleRange::is_superset(const SimpleRange &r) const +{ + bool bret = false; + if (start_.can_compare(r.start_) && end_.can_compare(r.end_)) { + int cmp1 = compare_with_start(r); + int cmp2 = compare_with_end(r); + bret = cmp1 <= 0 && cmp2 >= 0; + } + return bret; +} + +void SimpleRange::multiply_double(double coff) +{ + if (start_.is_double()) { + start_.set_double(start_.get_double() * coff); + } + if (end_.is_double()) { + end_.set_double(end_.get_double() * coff); + } + if (coff < 0) { + std::swap(start_, end_); + std::swap(inclusive_start_, inclusive_end_); + if (start_.is_min_value()) { + start_.set_max_value(); + } else if (start_.is_max_value()) { + start_.set_min_value(); + } + if (end_.is_min_value()) { + end_.set_max_value(); + } else if (end_.is_max_value()) { + end_.set_min_value(); + } + } +} + int ObSelEstimator::append_estimators(ObIArray &sel_estimators, ObSelEstimator *new_estimator) { int ret = OB_SUCCESS; @@ -281,46 +464,26 @@ int ObInSelEstimator::get_in_sel(const OptTableMetas &table_metas, return ret; } -int ObIsSelEstimator::get_is_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) +int ObIsSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) { int ret = OB_SUCCESS; selectivity = DEFAULT_SEL; - const ParamStore *params = ctx.get_params(); - const ObDMLStmt *stmt = ctx.get_stmt(); - const ObRawExpr *left_expr = qual.get_param_expr(0); - const ObRawExpr *right_expr = qual.get_param_expr(1); - ObObj result; - bool got_result = false; - if (OB_ISNULL(params) || OB_ISNULL(stmt) || OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpect null", K(ret), K(params), K(stmt), K(left_expr), K(right_expr)); - } else if (OB_UNLIKELY(!ObOptEstUtils::is_calculable_expr(*right_expr, params->count()))) { - // do nothing - } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), - right_expr, - result, - got_result, - ctx.get_allocator()))) { - LOG_WARN("failed to calculate const or calculable expr", K(ret)); - } else if (!got_result) { - // do nothing - } else if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(left_expr))) { - LOG_WARN("failed to remove ignorable func", KPC(left_expr)); - } else if (left_expr->is_column_ref_expr()) { - if (OB_FAIL(ObOptSelectivity::check_column_in_current_level_stmt(stmt, *left_expr))) { - LOG_WARN("Failed to check column whether is in current stmt", K(ret)); - } else if (OB_LIKELY(result.is_null())) { - if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *left_expr, NULL, &selectivity))) { + if (can_calc_sel_) { + if (OB_ISNULL(expr_) || OB_ISNULL(left_expr_) || OB_UNLIKELY(!left_expr_->is_column_ref_expr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpect error", K(ret), KPC(expr_), KPC(left_expr_)); + } else if (OB_LIKELY(right_const_obj_.is_null())) { + if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *left_expr_, NULL, &selectivity))) { LOG_WARN("Failed to get var distinct sel", K(ret)); } - } else if (result.is_tinyint() && - !ob_is_string_or_lob_type(left_expr->get_data_type())) { + } else if (right_const_obj_.is_tinyint() && + !ob_is_string_or_lob_type(left_expr_->get_data_type())) { double distinct_sel = 0.0; double null_sel = 0.0; - if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *left_expr, &distinct_sel, &null_sel))) { + if (OB_FAIL(ObOptSelectivity::get_column_basic_sel(table_metas, ctx, *left_expr_, &distinct_sel, &null_sel))) { LOG_WARN("Failed to get var distinct sel", K(ret)); } else { //distinct_num < 2. That is distinct_num only 1,(As double and statistics not completely accurate, @@ -331,116 +494,51 @@ int ObIsSelEstimator::get_is_sel(const OptTableMetas &table_metas, //But we don't kown whether distinct value is 0. So gess the selectivity: (1 - null_sel)/2.0 distinct_sel = (1- null_sel) / 2.0;//don't kow the value, just get half. } - selectivity = (result.is_true()) ? (1 - distinct_sel - null_sel) : distinct_sel; + selectivity = (right_const_obj_.is_true()) ? (1 - distinct_sel - null_sel) : distinct_sel; } - } else { }//default sel - } else { - //TODO func(cnt_column) + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), KPC(left_expr_), K(right_const_obj_)); + } } - if (T_OP_IS_NOT == qual.get_expr_type()) { + if (OB_SUCC(ret) && T_OP_IS_NOT == expr_->get_expr_type()) { selectivity = 1.0 - selectivity; } return ret; } -int ObCmpSelEstimator::get_range_cmp_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) +int ObCmpSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) { int ret = OB_SUCCESS; selectivity = DEFAULT_INEQ_SEL; - const ObRawExpr *left_expr = qual.get_param_expr(0); - const ObRawExpr *right_expr = qual.get_param_expr(1); - if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null expr", K(ret), K(left_expr), K(right_expr)); - } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(left_expr, left_expr)) || - OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(right_expr, right_expr))) { - LOG_WARN("failed to get expr without lossless cast", K(ret)); - } else if ((left_expr->is_column_ref_expr() && right_expr->is_const_expr()) || - (left_expr->is_const_expr() && right_expr->is_column_ref_expr())) { - const ObRawExpr *col_expr = left_expr->is_column_ref_expr() ? left_expr : right_expr; - if (OB_FAIL(ObOptSelectivity::get_column_range_sel(table_metas, ctx, - static_cast(*col_expr), - qual, true, selectivity))) { - LOG_WARN("Failed to get column range sel", K(qual), K(ret)); - } - } else if (T_OP_ROW == left_expr->get_expr_type() && T_OP_ROW == right_expr->get_expr_type()) { - //only deal (col1, xx, xx) CMP (const, xx, xx) - if (left_expr->get_param_count() == 1 && OB_NOT_NULL(left_expr->get_param_expr(0)) && - T_OP_ROW == left_expr->get_param_expr(0)->get_expr_type()) { - left_expr = left_expr->get_param_expr(0); - } - if (right_expr->get_param_count() == 1 && OB_NOT_NULL(right_expr->get_param_expr(0)) && - T_OP_ROW == right_expr->get_param_expr(0)->get_expr_type()) { - right_expr = right_expr->get_param_expr(0); - } - if (left_expr->get_param_count() != right_expr->get_param_count()) { + if (can_calc_sel_) { + if (OB_ISNULL(expr_) || OB_ISNULL(col_expr_)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("param count should be equal", - K(left_expr->get_param_count()), K(right_expr->get_param_count())); - } else if (left_expr->get_param_count() <= 1) { - // do nothing - } else if (OB_ISNULL(left_expr = left_expr->get_param_expr(0)) || - OB_ISNULL(right_expr = right_expr->get_param_expr(0))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(left_expr), K(right_expr)); - } else if ((left_expr->is_column_ref_expr() && right_expr->is_const_expr()) || - (left_expr->is_const_expr() && right_expr->is_column_ref_expr())) { - const ObRawExpr *col_expr = (left_expr->is_column_ref_expr()) ? (left_expr) : (right_expr); - if (OB_FAIL(ObOptSelectivity::get_column_range_sel(table_metas, ctx, - static_cast(*col_expr), - qual, true, selectivity))) { - LOG_WARN("failed to get column range sel", K(ret)); - } - } else { /* no dothing */ } + LOG_WARN("get null expr", K(ret), KPC(col_expr_), KPC(expr_)); + } else if (OB_FAIL(ObOptSelectivity::get_column_range_sel(table_metas, ctx, *col_expr_, *expr_, true, selectivity))) { + LOG_WARN("Failed to get column range sel", KPC(expr_), KPC(col_expr_), K(ret)); + } else {/*do nothing*/} } return ret; } -int ObBtwSelEstimator::get_btw_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity) +int ObBtwSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) { int ret = OB_SUCCESS; selectivity = DEFAULT_SEL; - const ObRawExpr *cmp_expr = NULL; - const ObRawExpr *l_expr = NULL; - const ObRawExpr *r_expr = NULL; - const ObRawExpr *col_expr = NULL; - const ParamStore *params = ctx.get_params(); - if (3 != qual.get_param_count()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("between expr should have 3 param", K(ret), K(qual)); - } else if (OB_ISNULL(params) || - OB_ISNULL(cmp_expr = qual.get_param_expr(0)) || - OB_ISNULL(l_expr = qual.get_param_expr(1)) || - OB_ISNULL(r_expr = qual.get_param_expr(2))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get null params", K(ret), K(params), K(cmp_expr), K(l_expr), K(r_expr)); - } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(cmp_expr, cmp_expr))) { - LOG_WARN("failed to get expr without lossless cast", K(ret)); - } else if (cmp_expr->is_column_ref_expr() && - ObOptEstUtils::is_calculable_expr(*l_expr, params->count()) && - ObOptEstUtils::is_calculable_expr(*r_expr, params->count())) { - col_expr = cmp_expr; - } else if (ObOptEstUtils::is_calculable_expr(*cmp_expr, params->count()) && - l_expr->is_column_ref_expr() && - ObOptEstUtils::is_calculable_expr(*r_expr, params->count())) { - col_expr = l_expr; - } else if (ObOptEstUtils::is_calculable_expr(*cmp_expr, params->count()) && - ObOptEstUtils::is_calculable_expr(*l_expr, params->count()) && - r_expr->is_column_ref_expr()) { - col_expr = r_expr; - } - if (NULL != col_expr) { - if (OB_FAIL(ObOptSelectivity::get_column_range_sel(table_metas, ctx, - static_cast(*col_expr), - qual, true, selectivity))) { - LOG_WARN("failed to get column range sel", K(ret)); + if (can_calc_sel_) { + if (OB_ISNULL(expr_) || OB_ISNULL(col_expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), KPC(col_expr_), KPC(expr_)); + } else if (OB_FAIL(ObOptSelectivity::get_column_range_sel(table_metas, ctx, *col_expr_, *expr_, true, selectivity))) { + LOG_WARN("failed to get column range sel", K(ret), KPC(expr_), KPC(col_expr_)); } } return ret; @@ -509,6 +607,7 @@ int ObEqualSelEstimator::get_ne_sel(const OptTableMetas &table_metas, LOG_WARN("get unexpected expr", KPC(l_row), KPC(r_row), K(ret)); } else { int64_t num = l_row->get_param_count(); + ObSEArray selectivities; for (int64_t i = 0; OB_SUCC(ret) && i < num; ++i) { if (OB_ISNULL(l_param = l_row->get_param_expr(i)) || OB_ISNULL(r_param = r_row->get_param_expr(i))) { @@ -517,10 +616,11 @@ int ObEqualSelEstimator::get_ne_sel(const OptTableMetas &table_metas, } else if (OB_FAIL(SMART_CALL(get_ne_sel(table_metas, ctx, *l_param, *r_param, tmp_selectivity)))) { LOG_WARN("failed to get equal selectivity", K(ret)); - } else { - selectivity += tmp_selectivity - selectivity * tmp_selectivity; + } else if (OB_FAIL(selectivities.push_back(1 - tmp_selectivity))) { + LOG_WARN("failed to push back", K(ret)); } } + selectivity = 1 - ctx.get_correlation_model().combine_filters_selectivity(selectivities); } } else if (l_expr.has_flag(CNT_COLUMN) && r_expr.has_flag(CNT_COLUMN)) { if (OB_FAIL(get_cntcol_op_cntcol_sel(table_metas, ctx, l_expr, r_expr, T_OP_NE, selectivity))) { @@ -648,6 +748,7 @@ int ObEqualSelEstimator::get_equal_sel(const OptTableMetas &table_metas, LOG_WARN("get unexpected expr", KPC(l_row), KPC(l_row), K(ret)); } else { int64_t num = l_row->get_param_count(); + ObSEArray selectivities; for (int64_t i = 0; OB_SUCC(ret) && i < num; ++i) { if (OB_ISNULL(l_expr = l_row->get_param_expr(i)) || OB_ISNULL(r_expr = r_row->get_param_expr(i))) { @@ -656,10 +757,11 @@ int ObEqualSelEstimator::get_equal_sel(const OptTableMetas &table_metas, } else if (OB_FAIL(SMART_CALL(get_equal_sel(table_metas, ctx, *l_expr, *r_expr, null_safe, tmp_selectivity)))) { LOG_WARN("failed to get equal selectivity", K(ret)); - } else { - selectivity *= tmp_selectivity; + } else if (OB_FAIL(selectivities.push_back(tmp_selectivity))) { + LOG_WARN("failed to push back"); } } + selectivity = ctx.get_correlation_model().combine_filters_selectivity(selectivities); } } else if ((left_expr.has_flag(CNT_COLUMN) && !right_expr.has_flag(CNT_COLUMN)) || (!left_expr.has_flag(CNT_COLUMN) && right_expr.has_flag(CNT_COLUMN))) { @@ -793,6 +895,13 @@ int ObEqualSelEstimator::get_simple_equal_sel(const OptTableMetas &table_metas, return ret; } +/** + * For the equal predicate 'a = b', we calculate the NDV of (a, b), + * and use the maximum number of tuples that might satisfy the equality as the result of predicate filtering. + * Therefore, the selectivity should be 'min(ndv(a), ndv(b)) / ndv(a, b)'. + * In the case of a join, the left and right sides of the equality are always independent, + * so the selectivity can be simplified as '1 / max(ndv(a), ndv(b))'. +*/ int ObEqualSelEstimator::get_cntcol_op_cntcol_sel(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, const ObRawExpr &input_left_expr, @@ -814,6 +923,52 @@ int ObEqualSelEstimator::get_cntcol_op_cntcol_sel(const OptTableMetas &table_met } else if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(left_expr), K(right_expr)); + } else if (left_expr->get_relation_ids().equal(right_expr->get_relation_ids()) && + ctx.check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + double combine_ndv = 1; + if (left_expr->is_column_ref_expr() && right_expr->is_column_ref_expr()) { + const ObColumnRefRawExpr* left_col = static_cast(left_expr); + const ObColumnRefRawExpr* right_col = static_cast(right_expr); + if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *left_expr, &left_ndv, &left_nns))) { + LOG_WARN("failed to get column basic sel", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *right_expr, &right_ndv, &right_nns))) { + LOG_WARN("failed to get column basic sel", K(ret)); + } else if (left_col->get_column_id() == right_col->get_column_id()) { + // same table same column + if (T_OP_NSEQ == op_type) { + selectivity = 1.0; + } else if (T_OP_EQ == op_type) { + selectivity = left_nns; + } else if (T_OP_NE == op_type) { + selectivity = 0.0; + } + } else { + combine_ndv = ObOptSelectivity::combine_two_ndvs(ctx.get_current_rows(), left_ndv, right_ndv); + combine_ndv = std::max(1.0, combine_ndv); + selectivity = std::min(left_ndv, right_ndv) / combine_ndv; + if (T_OP_NSEQ == op_type) { + selectivity += (1 - left_nns) * (1 - right_nns); + } else if (T_OP_EQ == op_type) { + // do nothing + } else if (T_OP_NE == op_type) { + selectivity = std::max(1 - selectivity, 1 / combine_ndv / 2.0); + } + } + } else { + if (OB_FAIL(ObOptSelectivity::calculate_distinct(table_metas, ctx, *left_expr, ctx.get_current_rows(), left_ndv))) { + LOG_WARN("Failed to calculate distinct", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(table_metas, ctx, *right_expr, ctx.get_current_rows(), right_ndv))) { + LOG_WARN("Failed to calculate distinct", K(ret)); + } else { + combine_ndv = ObOptSelectivity::combine_two_ndvs(ctx.get_current_rows(), left_ndv, right_ndv); + combine_ndv = std::max(1.0, combine_ndv); + selectivity = std::min(left_ndv, right_ndv) / combine_ndv; + if (T_OP_NE == op_type) { + selectivity = std::max(1 - selectivity, 1 / combine_ndv / 2.0); + } + } + } } else if (left_expr->is_column_ref_expr() && right_expr->is_column_ref_expr()) { const ObColumnRefRawExpr* left_col = NULL; const ObColumnRefRawExpr* right_col = NULL; @@ -1532,11 +1687,11 @@ int ObLikeSelEstimator::create_estimator(ObSelEstimatorFactory &factory, like_estimator->match_all_str_))) { LOG_WARN("failed to check if expr start with percent sign", K(ret)); } else if (like_estimator->match_all_str_) { - like_estimator->can_calc_sel_ = true; + like_estimator->can_calc_sel_by_prefix_ = true; } else if (is_lob_storage(like_estimator->variable_->get_data_type())) { // do nothing } else if (!is_start_with) { - like_estimator->can_calc_sel_ = true; + like_estimator->can_calc_sel_by_prefix_ = true; } } } @@ -1598,14 +1753,14 @@ int ObLikeSelEstimator::get_sel(const OptTableMetas &table_metas, if (OB_ISNULL(expr_) || OB_ISNULL(variable_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null expr", KPC(this)); - } else if (match_all_str_ && can_calc_sel_) { + } else if (match_all_str_ && can_calc_sel_by_prefix_) { double nns = 0.0; if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *variable_, NULL, &nns))) { LOG_WARN("failed to get nns"); } else { selectivity = nns; } - } else if (can_calc_sel_) { + } else if (can_calc_sel_by_prefix_) { if (OB_UNLIKELY(!variable_->is_column_ref_expr())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected expr", KPC(variable_)); @@ -1616,15 +1771,134 @@ int ObLikeSelEstimator::get_sel(const OptTableMetas &table_metas, } } else if (is_lob_storage(variable_->get_data_type())) { // no statistics for lob type, use default selectivity - selectivity = DEFAULT_CLOB_LIKE_SEL; + selectivity = DEFAULT_LIKE_SEL; + } else if (!ctx.check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + selectivity = DEFAULT_INEQ_SEL; + } else if (OB_FAIL(calculate_like_sel_by_substr(table_metas, + ctx, + selectivity))) { + LOG_WARN("failed to calculate like sel", K(ret)); + } + return ret; +} + +int ObLikeSelEstimator::get_wildcard_length(const OptSelectivityCtx &ctx, double &wildcard_length) +{ + int ret = OB_SUCCESS; + ObObj pattern_value; + bool got_result = false; + wildcard_length = 1.0; // default guess value + if (OB_ISNULL(pattern_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (!pattern_->is_static_const_expr()) { + ObString percent_str = ObCharsetUtils::get_const_str(pattern_->get_collation_type(), '%'); + wildcard_length = percent_str.length(); + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + pattern_, + pattern_value, + got_result, + ctx.get_allocator()))) { + LOG_WARN("failed to calc const or calculable expr", K(ret)); + } else if (!got_result || !pattern_value.is_string_type() || pattern_value.is_null()) { + // do nothing } else { - //try find the calc sel from dynamic sampling - int64_t idx = -1; - if (ObOptimizerUtil::find_item(all_predicate_sel, ObExprSelPair(&qual, 0), &idx)) { - selectivity = all_predicate_sel.at(idx).sel_; - } else { - selectivity = DEFAULT_INEQ_SEL; + const ObString &expr_str = pattern_value.get_string(); + ObStringScanner scanner(expr_str, pattern_->get_collation_type()); + ObString percent_str = ObCharsetUtils::get_const_str(pattern_->get_collation_type(), '%'); + ObString underline_str = ObCharsetUtils::get_const_str(pattern_->get_collation_type(), '_'); + ObString encoding; + int32_t wc = 0; + wildcard_length = 0.0; + while (OB_SUCC(ret) + && scanner.next_character(encoding, wc, ret)) { + if (0 == percent_str.compare(encoding)) { + wildcard_length += percent_str.length(); + } + if (0 == underline_str.compare(encoding)) { + wildcard_length += underline_str.length(); + } } + if (OB_FAIL(ret)) { + ret = OB_SUCCESS; + wildcard_length = percent_str.length(); + } + } + return ret; +} + +/** + * try estimate the like selectivity by substr + * e.g. + * `c1 like '%abc'` <=> `substr(c1, -3) = 'abc'` + * Assumption: + * 1. All strings in the variable and pattern have the same length. + * 2. The positions of non-wildcard characters are fixed. + * 3. If the pattern is not a constant, then it contains exactly one wildcard. + * 4. The pattern will not be null +*/ +int ObLikeSelEstimator::calculate_like_sel_by_substr(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity) +{ + int ret = OB_SUCCESS; + // default strategy, not reliable + double variable_len = 0; + double pattern_len = 0; + double substr_ndv = 1.0; + double pattern_ndv = 1.0; + double substr_nns = 1.0; + double pattern_nns = 1.0; // assume that the pattern is not null + double wildcard_length = 1.0; + selectivity = DEFAULT_LIKE_SEL; + if (OB_ISNULL(variable_) || OB_ISNULL(pattern_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (!variable_->get_result_type().is_string_type() || + !pattern_->get_result_type().is_string_type() || + !variable_->is_column_ref_expr()) { + // The length is not reliable, use default selectivity + } else if (OB_FAIL(get_wildcard_length(ctx, wildcard_length))) { + LOG_WARN("failed to get wildcard count", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calculate_expr_avg_len(table_metas, ctx, pattern_, pattern_len))) { + LOG_WARN("failed to calc expr len", KPC(pattern_), K(ret)); + } else if (pattern_len <= ObOptEstCostModel::DEFAULT_FIXED_OBJ_WIDTH + wildcard_length) { + // do nothing + } else if (FALSE_IT(pattern_len -= ObOptEstCostModel::DEFAULT_FIXED_OBJ_WIDTH)) { + } else if (OB_FAIL(ObOptSelectivity::calculate_substrb_info( + table_metas, ctx, variable_, pattern_len - wildcard_length, ctx.get_current_rows(), substr_ndv, substr_nns))) { + LOG_WARN("failed to calculate substrb ndv", KPC_(variable)); + } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(table_metas, ctx, *pattern_, ctx.get_current_rows(), pattern_ndv))) { + LOG_WARN("failed to calcualte distinct", KPC_(pattern)); + } else { + if (NULL == ctx.get_left_rel_ids() || NULL == ctx.get_right_rel_ids()) { + double combine_ndv = ObOptSelectivity::combine_two_ndvs(ctx.get_current_rows(), substr_ndv, pattern_ndv); + selectivity = std::min(substr_ndv, pattern_ndv) / std::max(1.0, combine_ndv); + selectivity *= substr_nns * pattern_nns; + } else { + double left_ndv = substr_ndv; + double right_ndv = pattern_ndv; + double left_nns = substr_nns; + double right_nns = pattern_nns; + if (variable_->get_relation_ids().overlap(*ctx.get_right_rel_ids()) || + pattern_->get_relation_ids().overlap(*ctx.get_left_rel_ids())) { + std::swap(left_ndv, right_ndv); + std::swap(left_nns, right_nns); + } + if (IS_LEFT_SEMI_ANTI_JOIN(ctx.get_join_type())) { + selectivity = (std::min(left_ndv, right_ndv) / left_ndv) * left_nns; + } else if (IS_RIGHT_SEMI_ANTI_JOIN(ctx.get_join_type())) { + selectivity = (std::min(left_ndv, right_ndv) / right_ndv) * right_nns; + } else { + selectivity = left_nns * right_nns / std::max(left_ndv, right_ndv); + } + if (OB_SUCC(ret) && selectivity >= 1.0 && IS_ANTI_JOIN(ctx.get_join_type())) { + selectivity = 1 - DEFAULT_ANTI_JOIN_SEL; + } + } + LOG_WARN("succeed to calculate like selectivity by substr", + K(selectivity), K(substr_ndv), K(substr_nns), K(pattern_ndv), K(pattern_nns), K(wildcard_length)); } return ret; } @@ -1691,7 +1965,7 @@ int ObBoolOpSelEstimator::get_sel(const OptTableMetas &table_metas, const ObRawExpr &qual = *expr_; if (OB_ISNULL(expr_)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null expr", KPC(this)); + LOG_WARN("unexpected null param", KPC(this), K(ctx)); } else if (T_OP_NOT == qual.get_expr_type() || T_FUN_SYS_LNNVL == qual.get_expr_type() || T_OP_BOOL == qual.get_expr_type()) { @@ -1752,17 +2026,21 @@ int ObBoolOpSelEstimator::get_sel(const OptTableMetas &table_metas, if (OB_FAIL(ObOptSelectivity::check_mutex_or(qual, is_mutex))) { LOG_WARN("failed to check mutex or", K(ret)); } else if (is_mutex) { - selectivity = ObOptSelectivity::get_filters_selectivity(selectivities, FilterDependencyType::MUTEX_OR); + selectivity = 0.0; + for (int64_t i = 0; i < selectivities.count(); i ++) { + selectivity += selectivities.at(i); + } + selectivity = ObOptSelectivity::revise_between_0_1(selectivity); } else { // sel(p1 or p2 or p3) = sel(!(!p1 and !p2 and !p3)) for (int64_t i = 0; i < selectivities.count(); i ++) { selectivities.at(i) = 1 - selectivities.at(i); } - selectivity = ObOptSelectivity::get_filters_selectivity(selectivities, ctx.get_dependency_type()); + selectivity = ctx.get_correlation_model().combine_filters_selectivity(selectivities); selectivity = 1- selectivity; } } else { - selectivity = ObOptSelectivity::get_filters_selectivity(selectivities, ctx.get_dependency_type()); + selectivity = ctx.get_correlation_model().combine_filters_selectivity(selectivities); } } else { ret = OB_ERR_UNEXPECTED; @@ -1859,6 +2137,7 @@ int ObSimpleJoinSelEstimator::create_estimator(ObSelEstimatorFactory &factory, } else { simple_join_estimator->left_rel_ids_ = left_rel_ids; simple_join_estimator->right_rel_ids_ = right_rel_ids; + simple_join_estimator->join_rel_ids_ = &expr.get_relation_ids(); estimator = simple_join_estimator; } return ret; @@ -1905,11 +2184,14 @@ int ObSimpleJoinSelEstimator::merge(const ObSelEstimator &other, bool &is_succes if (get_type() == other.get_type()) { const ObSimpleJoinSelEstimator &est_other = static_cast(other); if (OB_ISNULL(left_rel_ids_) || OB_ISNULL(right_rel_ids_) || - OB_ISNULL(est_other.left_rel_ids_) || OB_ISNULL(est_other.right_rel_ids_)) { + OB_ISNULL(est_other.left_rel_ids_) || OB_ISNULL(est_other.right_rel_ids_) || + OB_ISNULL(join_rel_ids_) || OB_ISNULL(est_other.join_rel_ids_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected NULL", KPC(this), K(est_other)); } else if (*left_rel_ids_ == *est_other.left_rel_ids_ && - *right_rel_ids_ == *est_other.right_rel_ids_) { + *right_rel_ids_ == *est_other.right_rel_ids_ && + *join_rel_ids_ == *est_other.join_rel_ids_ + ) { is_success = true; if (OB_FAIL(append(join_conditions_, est_other.join_conditions_))) { LOG_WARN("failed to append", K(ret)); @@ -1966,21 +2248,8 @@ int ObSimpleJoinSelEstimator::get_multi_equal_sel(const OptTableMetas &table_met LOG_WARN("failed get unexpected null", K(ret), K(ctx)); } else if (OB_FAIL(is_valid_multi_join(quals, is_valid))) { LOG_WARN("failed to check is valid multi join", K(ret)); - } else if (!is_valid) { - // multi join condition related to more than two table. Calculate selectivity for each join - // condition independently. - for (int64_t i = 0; OB_SUCC(ret) && i < quals.count(); ++i) { - ObRawExpr *cur_expr = quals.at(i); - double tmp_sel = 1.0; - if (OB_ISNULL(cur_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (OB_FAIL(ObEqualSelEstimator::get_equal_sel(table_metas, ctx, *cur_expr, tmp_sel))) { - LOG_WARN("failed to get equal selectivity", K(ret)); - } else { - selectivity *= tmp_sel; - } - } + } else if (OB_UNLIKELY(!is_valid)) { + ret = OB_ERR_UNEXPECTED; } else if (OB_FAIL(extract_join_exprs(quals, *ctx.get_left_rel_ids(), *ctx.get_right_rel_ids(), left_exprs, right_exprs, null_safes))) { LOG_WARN("failed to extract join exprs", K(ret)); @@ -2073,7 +2342,7 @@ int ObSimpleJoinSelEstimator::get_cntcols_eq_cntcols_sel(const OptTableMetas &ta const ObIArray &left_exprs, const ObIArray &right_exprs, const ObIArray &null_safes, - double &selectivity) + double &selectivity) { int ret = OB_SUCCESS; selectivity = DEFAULT_EQ_SEL; @@ -2161,6 +2430,18 @@ int ObSimpleJoinSelEstimator::get_cntcols_eq_cntcols_sel(const OptTableMetas &ta * ## NULL safe * a) semi: non NULL safe selectivity + `nullsafe(i) && left_not_null_sel(i) < 1.0 ? null_sel(i) * selectivity(j) [where j != i]: 0` */ + if (IS_SEMI_ANTI_JOIN(ctx.get_assumption_type())) { + // do nothing + } else if (left_contain_pk == right_contain_pk) { + // 两侧都不是主键或都是主键, 不做修正 + } else if (refine_right_ndv) { + // 一侧有主键时, 认为是主外键连接, 外键上最大的ndv为即为主键的原始ndv + right_ndv = std::min(right_ndv, left_origin_rows); + } else if (refine_left_ndv) { + left_ndv = std::min(left_ndv, right_origin_rows); + } else { + // do nothing + } if (IS_LEFT_SEMI_ANTI_JOIN(ctx.get_join_type())) { selectivity = std::min(left_ndv, right_ndv) / left_ndv; for (int64_t i = 0; i < left_not_null_sels.count(); ++i) { @@ -2291,7 +2572,7 @@ int ObInequalJoinSelEstimator::extract_column_offset(const OptSelectivityCtx &ct } else { is_valid = false; } - } else if (expr->is_static_const_expr()) { + } else if (expr->is_static_scalar_const_expr()) { ObObj const_value; ObObj scalar_value; bool got_result = false; @@ -2344,7 +2625,7 @@ int ObInequalJoinSelEstimator::create_estimator(ObSelEstimatorFactory &factory, LOG_WARN("failed to create estimator ", K(ret)); } else { ineq_join_estimator->term_ = term; - ineq_join_estimator->set_bound(expr.get_expr_type(), -offset); + ineq_join_estimator->range_.set_bound(expr.get_expr_type(), -offset); } } else if (T_OP_BTW == expr.get_expr_type()) { Term term1; @@ -2374,8 +2655,8 @@ int ObInequalJoinSelEstimator::create_estimator(ObSelEstimatorFactory &factory, LOG_WARN("failed to create estimator ", K(ret)); } else { ineq_join_estimator->term_ = term1; - ineq_join_estimator->set_bound(T_OP_GE, -offset1); - ineq_join_estimator->set_bound(T_OP_LE, -offset2); + ineq_join_estimator->range_.set_bound(T_OP_GE, -offset1); + ineq_join_estimator->range_.set_bound(T_OP_LE, -offset2); } } estimator = ineq_join_estimator; @@ -2405,57 +2686,6 @@ void ObInequalJoinSelEstimator::cmp_term(const ObInequalJoinSelEstimator::Term & } } -void ObInequalJoinSelEstimator::set_bound(ObItemType item_type, double bound) -{ - if (T_OP_LE == item_type) { - has_upper_bound_ = true; - upper_bound_ = bound; - include_upper_bound_ = true; - } else if (T_OP_LT == item_type) { - has_upper_bound_ = true; - upper_bound_ = bound; - include_upper_bound_ = false; - } else if (T_OP_GE == item_type) { - has_lower_bound_ = true; - lower_bound_ = bound; - include_lower_bound_ = true; - } else if (T_OP_GT == item_type) { - has_lower_bound_ = true; - lower_bound_ = bound; - include_lower_bound_ = false; - } -} - -void ObInequalJoinSelEstimator::reverse() -{ - term_.coefficient1_ = -term_.coefficient1_; - term_.coefficient2_ = -term_.coefficient2_; - std::swap(has_lower_bound_, has_upper_bound_); - std::swap(include_lower_bound_, include_upper_bound_); - std::swap(lower_bound_, upper_bound_); - lower_bound_ = -lower_bound_; - upper_bound_ = -upper_bound_; -} - -void ObInequalJoinSelEstimator::update_lower_bound(double bound, bool include) -{ - if (!has_lower_bound_ || - is_higher_lower_bound(bound, include, lower_bound_, include_lower_bound_)) { - include_lower_bound_ = include; - lower_bound_ = bound; - } - has_lower_bound_ = true; -} - -void ObInequalJoinSelEstimator::update_upper_bound(double bound, bool include) { - if (!has_upper_bound_ || - is_higher_upper_bound(upper_bound_, include_upper_bound_, bound, include)) { - include_upper_bound_ = include; - upper_bound_ = bound; - } - has_upper_bound_= true; -} - int ObInequalJoinSelEstimator::merge(const ObSelEstimator &other_estmator, bool &is_success) { int ret = OB_SUCCESS; @@ -2464,16 +2694,13 @@ int ObInequalJoinSelEstimator::merge(const ObSelEstimator &other_estmator, bool const ObInequalJoinSelEstimator &other = static_cast(other_estmator); bool need_reverse = false; cmp_term(term_, other.term_, is_success, need_reverse); - if (is_success){ + if (is_success) { if (need_reverse) { - reverse(); - } - if (other.has_lower_bound_) { - update_lower_bound(other.lower_bound_, other.include_lower_bound_); - } - if (other.has_upper_bound_) { - update_upper_bound(other.upper_bound_, other.include_upper_bound_); + term_.coefficient1_ = -term_.coefficient1_; + term_.coefficient2_ = -term_.coefficient2_; + range_.multiply_double(-1.0); } + range_.intersect(other.range_); } } return ret; @@ -2591,14 +2818,16 @@ int ObInequalJoinSelEstimator::get_sel(const OptTableMetas &table_metas, selectivity = 1.0; double nns1, nns2, ndv1, ndv2; double min1, min2, max1, max2; - double lower_bound = lower_bound_; - double upper_bound = upper_bound_; - bool is_eq = include_lower_bound_ && include_upper_bound_ && - upper_bound - lower_bound <= OB_DOUBLE_EPSINON && - lower_bound - upper_bound <= OB_DOUBLE_EPSINON; + double lower_bound = range_.start_.get_double(); + bool has_lower_bound = !range_.start_.is_min_value(); + double upper_bound = range_.end_.get_double(); + bool has_upper_bound = !range_.end_.is_max_value(); + bool is_valid = range_.is_valid_range(); + bool is_eq = range_.inclusive_start_ && range_.inclusive_end_ && + !range_.start_.is_min_value() && !range_.end_.is_max_value() && + fabs(range_.end_.get_double() - range_.start_.get_double()) <= OB_DOUBLE_EPSINON; if (OB_ISNULL(term_.col1_) || OB_ISNULL(term_.col2_) || - OB_UNLIKELY(!has_lower_bound_ && !has_upper_bound_) || OB_UNLIKELY(fabs(term_.coefficient1_) != 1.0) || OB_UNLIKELY(fabs(term_.coefficient2_) != 1.0)) { ret = OB_ERR_UNEXPECTED; @@ -2607,8 +2836,7 @@ int ObInequalJoinSelEstimator::get_sel(const OptTableMetas &table_metas, LOG_WARN("failed to get nns"); } else if (OB_FAIL(ObOptSelectivity::get_column_ndv_and_nns(table_metas, ctx, *term_.col2_, &ndv2, &nns2))) { LOG_WARN("failed to get nns"); - } else if (has_lower_bound_ && has_upper_bound_ && - lower_bound >= upper_bound && !is_eq) { + } else if (!range_.is_valid_range()) { // always false // e.g. 1 < c1 + c2 < 0 selectivity = 0.0; @@ -2616,18 +2844,9 @@ int ObInequalJoinSelEstimator::get_sel(const OptTableMetas &table_metas, term_.col1_->get_column_id() == term_.col2_->get_column_id()) { // same column if (fabs(term_.coefficient1_ + term_.coefficient2_) <= OB_DOUBLE_EPSINON) { - if (has_lower_bound_ && - is_higher_lower_bound(lower_bound, include_lower_bound_, 0, true)) { - // e.g. : c1 - c1 > 1 - selectivity = 0.0; - } else if (has_upper_bound_ && - is_higher_upper_bound(0, true, upper_bound, include_upper_bound_)) { - // e.g. : c1 - c1 < - 1 - selectivity = 0.0; - } else { - // e.g. : c1 - c1 < 1 - selectivity = nns1; - } + // e.g. : c1 - c1 < 1 + // c1 - c1 > 1 + selectivity = get_sel_for_point(0.0) * nns1; } else { // TODO : c1 + c1 < 1 selectivity = DEFAULT_INEQ_JOIN_SEL; @@ -2692,7 +2911,7 @@ int ObInequalJoinSelEstimator::get_sel(const OptTableMetas &table_metas, } else if (fabs(max1 - min1) <= OB_DOUBLE_EPSINON && fabs(max2 - min2) <= OB_DOUBLE_EPSINON) { // Both c1 and c2 have only one value // e.g. c1 in [1,1] and c2 in [2,2] - selectivity = get_sel_for_point(min1, min2); + selectivity = get_sel_for_point(min1 + min2); } else if (is_eq) { // lower bound is the same as the upper bound // e.g : 1 <= c1 + c2 <= 1; @@ -2700,29 +2919,29 @@ int ObInequalJoinSelEstimator::get_sel(const OptTableMetas &table_metas, } else if (is_semi) { // calculate selectivity for semi join // e.g. : 0 <= c1 + c2 < 1 - double sel1 = has_lower_bound_ ? ObInequalJoinSelEstimator::get_any_gt_sel(min1, max1, min2, max2, lower_bound) : 1.0; - double sel2 = has_upper_bound_ ? ObInequalJoinSelEstimator::get_all_gt_sel(min1, max1, min2, max2, upper_bound) : 0.0; + double sel1 = has_lower_bound ? ObInequalJoinSelEstimator::get_any_gt_sel(min1, max1, min2, max2, lower_bound) : 1.0; + double sel2 = has_upper_bound ? ObInequalJoinSelEstimator::get_all_gt_sel(min1, max1, min2, max2, upper_bound) : 0.0; // the sel of `any c2 satisfy 'a < c1 + c2 < b'` = // the sel of `any c2 satisfy 'c1 + c2 > a'` minus the sel of `all c2 satisfy 'c1 + c2 > b'` selectivity = sel1 - sel2; - if (include_lower_bound_ && ndv1 > 1) { + if (range_.inclusive_start_ && ndv1 > 1) { selectivity += 1 / ndv1; } - if (include_upper_bound_ && ndv1 > 1) { + if (range_.inclusive_end_ && ndv1 > 1) { selectivity += 1 / ndv1; } } else { // calculate selectivity for inner join // e.g. : 0 <= c1 + c2 < 1 - double sel1 = has_lower_bound_ ? ObInequalJoinSelEstimator::get_gt_sel(min1, max1, min2, max2, lower_bound) : 1.0; - double sel2 = has_upper_bound_ ? ObInequalJoinSelEstimator::get_gt_sel(min1, max1, min2, max2, upper_bound) : 0.0; + double sel1 = has_lower_bound ? ObInequalJoinSelEstimator::get_gt_sel(min1, max1, min2, max2, lower_bound) : 1.0; + double sel2 = has_upper_bound ? ObInequalJoinSelEstimator::get_gt_sel(min1, max1, min2, max2, upper_bound) : 0.0; // the sel of 'a < c1 + c2 < b' = // the sel of 'c1 + c2 > a' minus the sel of 'c1 + c2 > b' selectivity = sel1 - sel2; - if (include_lower_bound_) { + if (range_.inclusive_start_) { selectivity += ObInequalJoinSelEstimator::get_equal_sel(min1, max1, ndv1, min2, max2, ndv2, lower_bound, is_semi); } - if (include_upper_bound_) { + if (range_.inclusive_end_) { selectivity += ObInequalJoinSelEstimator::get_equal_sel(min1, max1, ndv1, min2, max2, ndv2, upper_bound, is_semi); } } @@ -2740,17 +2959,11 @@ int ObInequalJoinSelEstimator::get_sel(const OptTableMetas &table_metas, return ret; } -double ObInequalJoinSelEstimator::get_sel_for_point(double point1, double point2) +double ObInequalJoinSelEstimator::get_sel_for_point(double point) { - bool within_interval = true; - double sum = point1 + point2; - if (has_lower_bound_) { - within_interval &= include_lower_bound_ ? sum >= lower_bound_ : sum > lower_bound_; - } - if (has_upper_bound_) { - within_interval &= include_upper_bound_ ? sum <= upper_bound_ : sum < upper_bound_; - } - return within_interval ? 1.0 : 0.0; + SimpleRange point_range; + point_range.set_bound(T_OP_EQ, point); + return range_.is_superset(point_range) ? 1.0 : 0.0; } int ObSelEstimatorFactory::create_estimator(const OptSelectivityCtx &ctx, @@ -2775,6 +2988,7 @@ int ObSelEstimatorFactory::create_estimator(const OptSelectivityCtx &ctx, ObBoolOpSelEstimator::create_estimator, ObInSelEstimator::create_estimator, ObIsSelEstimator::create_estimator, + ObUniformRangeSelEstimator::create_estimator, ObCmpSelEstimator::create_estimator, ObBtwSelEstimator::create_estimator, ObDefaultSelEstimator::create_estimator, @@ -2799,5 +3013,526 @@ int ObSelEstimatorFactory::create_estimator(const OptSelectivityCtx &ctx, return ret; } +int ObSelEstimatorFactory::create_estimators(const OptSelectivityCtx &ctx, + ObIArray &exprs, + ObIArray &estimators) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); i ++) { + ObSelEstimator *estimator = NULL; + if (OB_FAIL(create_estimator(ctx, exprs.at(i), estimator))) { + LOG_WARN("failed to create estimator", K(ret)); + } else if (OB_FAIL(ObSelEstimator::append_estimators(estimators, estimator))) { + LOG_WARN("failed to append estimators", K(ret)); + } + } + return ret; +} + +int ObEqualSelEstimator::create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(create_simple_estimator(factory, ctx, expr, estimator))) { + LOG_WARN("failed to create simple estimator", K(ret)); + } else if (OB_ISNULL(estimator)) { + //do nothing + } else if (OB_UNLIKELY(expr.get_param_count() != 2) || + OB_ISNULL(expr.get_param_expr(0)) || + OB_ISNULL(expr.get_param_expr(1))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(expr)); + } else if (OB_FAIL(check_can_calc_sel(*expr.get_param_expr(0), + *expr.get_param_expr(1), + static_cast(estimator)->can_calc_sel_))) { + LOG_WARN("failed to check can calc sel", K(ret)); + } else {/*do nothing*/} + return ret; +} + +int ObEqualSelEstimator::check_can_calc_sel(const ObRawExpr &l_expr, + const ObRawExpr &r_expr, + bool &can_calc_sel) +{ + int ret = OB_SUCCESS; + can_calc_sel = true; + if (T_OP_ROW == l_expr.get_expr_type() && T_OP_ROW == r_expr.get_expr_type()) { + //row compare row + const ObRawExpr *l_param = NULL; + const ObRawExpr *r_param = NULL; + const ObRawExpr *l_row = &l_expr; + const ObRawExpr *r_row = &r_expr; + if (l_expr.get_param_count() == 1 && OB_NOT_NULL(l_expr.get_param_expr(0)) && + T_OP_ROW == l_expr.get_param_expr(0)->get_expr_type()) { + l_row = l_expr.get_param_expr(0); + } + if (r_expr.get_param_count() == 1 && OB_NOT_NULL(r_expr.get_param_expr(0)) && + T_OP_ROW == r_expr.get_param_expr(0)->get_expr_type()) { + r_row = r_expr.get_param_expr(0); + } + if (OB_UNLIKELY(l_row->get_param_count() != r_row->get_param_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected expr", KPC(l_row), KPC(r_row), K(ret)); + } else { + int64_t num = l_row->get_param_count(); + for (int64_t i = 0; OB_SUCC(ret) && can_calc_sel && i < num; ++i) { + if (OB_ISNULL(l_param = l_row->get_param_expr(i)) || + OB_ISNULL(r_param = r_row->get_param_expr(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(l_row), K(r_row), K(i)); + } else if (OB_FAIL(SMART_CALL(check_can_calc_sel(*l_param, *r_param, can_calc_sel)))) { + LOG_WARN("failed to check can calc sel", K(ret)); + } + } + } + } else if ((l_expr.has_flag(CNT_COLUMN) && !r_expr.has_flag(CNT_COLUMN)) || + (!l_expr.has_flag(CNT_COLUMN) && r_expr.has_flag(CNT_COLUMN))) { + //column compare const + const ObRawExpr *cnt_col_expr = l_expr.has_flag(CNT_COLUMN) ? &l_expr : &r_expr; + ObSEArray column_exprs; + bool only_monotonic_op = true; + if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(cnt_col_expr))) { + LOG_WARN("failed to remove ignorable function", K(ret)); + } else if (cnt_col_expr->is_column_ref_expr()) { + //do nothing + } else if (OB_FAIL(ObOptEstUtils::extract_column_exprs_with_op_check(cnt_col_expr, + column_exprs, + only_monotonic_op))) { + LOG_WARN("failed to extract column exprs with op check", K(ret)); + } else if (!only_monotonic_op || column_exprs.count() > 1) { + can_calc_sel= false; + } else {/*do nothing*/} + } else if (l_expr.has_flag(CNT_COLUMN) && r_expr.has_flag(CNT_COLUMN)) { + //column compare column + const ObRawExpr* left_expr = &l_expr; + const ObRawExpr* right_expr = &r_expr; + if (left_expr->get_relation_ids() != right_expr->get_relation_ids()) { + //do noting, not same table, dynamic sampling not support join. + } else if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(left_expr)) || + OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(right_expr))) { + LOG_WARN("failed to remove ignorable function", K(ret)); + } else if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(left_expr), K(right_expr)); + } else if (left_expr->is_column_ref_expr() && right_expr->is_column_ref_expr()) { + //do nothing + } else {// func(col) = func(col) or col = func(col) + can_calc_sel = false; + } + } else { + //const compare const + //do nothing + } + return ret; +} + +int ObIsSelEstimator::create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(create_simple_estimator(factory, ctx, expr, estimator))) { + LOG_WARN("failed to create simple estimator", K(ret)); + } else if (OB_ISNULL(estimator)) { + //do nothing + } else if (OB_UNLIKELY(expr.get_param_count() != 2)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(expr)); + } else { + const ParamStore *params = ctx.get_params(); + const ObDMLStmt *stmt = ctx.get_stmt(); + const ObRawExpr *left_expr = expr.get_param_expr(0); + const ObRawExpr *right_expr = expr.get_param_expr(1); + bool got_result = false; + if (OB_ISNULL(params) || OB_ISNULL(stmt) || OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpect null", K(ret), K(params), K(stmt), K(left_expr), K(right_expr)); + } else if (OB_UNLIKELY(!ObOptEstUtils::is_calculable_expr(*right_expr, params->count()))) { + //do nothing + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + right_expr, + static_cast(estimator)->right_const_obj_, + got_result, + ctx.get_allocator()))) { + LOG_WARN("failed to calculate const or calculable expr", K(ret)); + } else if (!got_result) { + // do nothing + } else if (OB_FAIL(ObOptSelectivity::remove_ignorable_func_for_est_sel(left_expr))) { + LOG_WARN("failed to remove ignorable func", KPC(left_expr)); + } else if (left_expr->is_column_ref_expr()) { + if (OB_FAIL(ObOptSelectivity::check_column_in_current_level_stmt(stmt, *left_expr))) { + LOG_WARN("Failed to check column whether is in current stmt", K(ret)); + } else if (static_cast(estimator)->right_const_obj_.is_null() || + (static_cast(estimator)->right_const_obj_.is_tinyint() && + !ob_is_string_or_lob_type(left_expr->get_data_type()))) { + static_cast(estimator)->can_calc_sel_ = true; + static_cast(estimator)->left_expr_ = left_expr; + } + } + } + return ret; +} + +int ObCmpSelEstimator::create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(create_simple_estimator(factory, ctx, expr, estimator))) { + LOG_WARN("failed to create simple estimator", K(ret)); + } else if (OB_ISNULL(estimator)) { + //do nothing + } else if (OB_UNLIKELY(expr.get_param_count() != 2)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(expr)); + } else { + const ObRawExpr *left_expr = expr.get_param_expr(0); + const ObRawExpr *right_expr = expr.get_param_expr(1); + if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null expr", K(ret), K(left_expr), K(right_expr)); + } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(left_expr, left_expr)) || + OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(right_expr, right_expr))) { + LOG_WARN("failed to get expr without lossless cast", K(ret)); + } else if ((left_expr->is_column_ref_expr() && right_expr->is_const_expr()) || + (left_expr->is_const_expr() && right_expr->is_column_ref_expr())) { + static_cast(estimator)->can_calc_sel_ = true; + static_cast(estimator)->col_expr_ = left_expr->is_column_ref_expr() ? static_cast(left_expr) : + static_cast(right_expr); + } else if (T_OP_ROW == left_expr->get_expr_type() && T_OP_ROW == right_expr->get_expr_type()) { + //only deal (col1, xx, xx) CMP (const, xx, xx) + if (left_expr->get_param_count() == 1 && OB_NOT_NULL(left_expr->get_param_expr(0)) && + T_OP_ROW == left_expr->get_param_expr(0)->get_expr_type()) { + left_expr = left_expr->get_param_expr(0); + } + if (right_expr->get_param_count() == 1 && OB_NOT_NULL(right_expr->get_param_expr(0)) && + T_OP_ROW == right_expr->get_param_expr(0)->get_expr_type()) { + right_expr = right_expr->get_param_expr(0); + } + if (left_expr->get_param_count() != right_expr->get_param_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param count should be equal", + K(left_expr->get_param_count()), K(right_expr->get_param_count())); + } else if (left_expr->get_param_count() <= 1) { + // do nothing + } else if (OB_ISNULL(left_expr = left_expr->get_param_expr(0)) || + OB_ISNULL(right_expr = right_expr->get_param_expr(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(left_expr), K(right_expr)); + } else if ((left_expr->is_column_ref_expr() && right_expr->is_const_expr()) || + (left_expr->is_const_expr() && right_expr->is_column_ref_expr())) { + static_cast(estimator)->can_calc_sel_ = true; + static_cast(estimator)->col_expr_ = left_expr->is_column_ref_expr() ? static_cast(left_expr) : + static_cast(right_expr); + } else { /* no dothing */ } + } + } + return ret; +} + +int ObBtwSelEstimator::create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(create_simple_estimator(factory, ctx, expr, estimator))) { + LOG_WARN("failed to create simple estimator", K(ret)); + } else if (OB_ISNULL(estimator)) { + //do nothing + } else { + const ObRawExpr *cmp_expr = NULL; + const ObRawExpr *l_expr = NULL; + const ObRawExpr *r_expr = NULL; + const ObRawExpr *col_expr = NULL; + const ParamStore *params = ctx.get_params(); + if (3 != expr.get_param_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("between expr should have 3 param", K(ret), K(expr)); + } else if (OB_ISNULL(params) || + OB_ISNULL(cmp_expr = expr.get_param_expr(0)) || + OB_ISNULL(l_expr = expr.get_param_expr(1)) || + OB_ISNULL(r_expr = expr.get_param_expr(2))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null params", K(ret), K(params), K(cmp_expr), K(l_expr), K(r_expr)); + } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(cmp_expr, cmp_expr))) { + LOG_WARN("failed to get expr without lossless cast", K(ret)); + } else if (cmp_expr->is_column_ref_expr() && + ObOptEstUtils::is_calculable_expr(*l_expr, params->count()) && + ObOptEstUtils::is_calculable_expr(*r_expr, params->count())) { + static_cast(estimator)->can_calc_sel_ = true; + static_cast(estimator)->col_expr_ = static_cast(cmp_expr); + } else if (ObOptEstUtils::is_calculable_expr(*cmp_expr, params->count()) && + l_expr->is_column_ref_expr() && + ObOptEstUtils::is_calculable_expr(*r_expr, params->count())) { + static_cast(estimator)->can_calc_sel_ = true; + static_cast(estimator)->col_expr_ = static_cast(l_expr); + } else if (ObOptEstUtils::is_calculable_expr(*cmp_expr, params->count()) && + ObOptEstUtils::is_calculable_expr(*l_expr, params->count()) && + r_expr->is_column_ref_expr()) { + static_cast(estimator)->can_calc_sel_ = true; + static_cast(estimator)->col_expr_ = static_cast(r_expr); + } + } + return ret; +} + +int ObNormalRangeSelEstimator::get_expr_range(const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + const ObRawExpr *&expr, + SimpleRange &range, + bool &is_not_op, + bool &is_valid) +{ + int ret = OB_SUCCESS; + is_valid = false; + is_not_op = false; + expr = NULL; + const ObRawExpr *const_expr1 = NULL; + const ObRawExpr *const_expr2 = NULL; + ObObj const_value1; + ObObj const_value2; + bool got_result = false; + range.set_whole_range(); + ObItemType type = qual.get_expr_type(); + if (OB_FAIL(ObOptEstUtils::extract_var_op_const(&qual, + expr, + const_expr1, + const_expr2, + type, + is_valid))) { + LOG_WARN("failed to extract var and const", K(ret), K(qual)); + } else if (!is_valid) { + // do nothing + } else if (NULL == const_expr1 || !const_expr1->is_static_scalar_const_expr()) { + is_valid = false; + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + const_expr1, + const_value1, + got_result, + ctx.get_allocator()))) { + LOG_WARN("failed to calc const value", K(expr), K(ret)); + } else if (!got_result) { + is_valid = false; + } else if (NULL == const_expr2 || !const_expr2->is_static_scalar_const_expr()) { + // do nothing + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(ctx.get_opt_ctx().get_exec_ctx(), + const_expr2, + const_value2, + got_result, + ctx.get_allocator()))) { + LOG_WARN("failed to calc const value", K(expr), K(ret)); + } else if (!got_result) { + is_valid = false; + } + if (OB_SUCC(ret) && is_valid) { + if (IS_RANGE_CMP_OP(type) || T_OP_EQ == type || T_OP_NSEQ == type) { + range.set_bound(type, const_value1); + } else if (T_OP_NE == type) { + range.set_bound(T_OP_EQ, const_value1); + is_not_op = true; + } else if (T_OP_IS_NOT == type || T_OP_IS == type) { + if (const_value1.is_null()) { + range.set_bound(type, const_value1); + } else { + is_valid = false; + } + } else if (T_OP_BTW == type || T_OP_NOT_BTW == type) { + range.set_bound(T_OP_GE, const_value1); + range.set_bound(T_OP_LE, const_value2); + is_not_op = (T_OP_NOT_BTW == type); + } + } + return ret; +} + +int ObNormalRangeSelEstimator::merge(const ObSelEstimator &other_estmator, bool &is_success) +{ + int ret = OB_SUCCESS; + is_success = false; + if (get_type() == other_estmator.get_type() && !is_not_op_) { + const ObNormalRangeSelEstimator &other = static_cast(other_estmator); + if (!other.is_not_op_ && expr_ == other.expr_ && range_.intersect(other.range_)) { + is_success = true; + } + } + return ret; +} + +int ObUniformRangeSelEstimator::create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator) +{ + int ret = OB_SUCCESS; + estimator = NULL; + ObUniformRangeSelEstimator *range_estimator = NULL; + bool is_valid = false; + const ObRawExpr *param_expr = NULL; + SimpleRange range; + bool is_not_op = false; + if (!ctx.check_opt_compat_version(COMPAT_VERSION_4_2_4, COMPAT_VERSION_4_3_0, + COMPAT_VERSION_4_3_3)) { + // do nothing + } else if (OB_FAIL(get_expr_range(ctx, expr, param_expr, range, is_not_op, is_valid))) { + LOG_WARN("failed to get the range form", K(ret), K(expr)); + } else if (!is_valid) { + // do nothing + } else if (OB_FAIL(factory.create_estimator_inner(range_estimator))) { + LOG_WARN("failed to create estimator", K(ret)); + } else { + estimator = range_estimator; + range_estimator->expr_ = param_expr; + range_estimator->range_ = range; + range_estimator->is_not_op_ = is_not_op; + } + return ret; +} + +int ObUniformRangeSelEstimator::get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) +{ + int ret = OB_SUCCESS; + selectivity = DEFAULT_INEQ_SEL; + ObObj expr_min; + ObObj expr_max; + ObObj min_scalar; + ObObj max_scalar; + ObObj start_scalar; + ObObj end_scalar; + expr_min.set_min_value(); + expr_max.set_max_value(); + double ndv = 1.0; + double not_null_sel = 1.0; // todo + bool dummy = false; + bool discrete = (expr_->get_type_class() != ObFloatTC) && (expr_->get_type_class() != ObDoubleTC); + ObBorderFlag border_flag; + if (range_.inclusive_start_){ + border_flag.set_inclusive_start(); + } + if (range_.inclusive_end_) { + border_flag.set_inclusive_end(); + } + if (!range_.is_valid_range()) { + selectivity = 0.0; + } else if (OB_FAIL(ObOptSelectivity::calc_expr_min_max(table_metas, ctx, expr_, + expr_min, expr_max))) { + LOG_WARN("failed to get min max", K(ret)); + } else if (expr_min.is_min_value() || expr_min.is_max_value() || expr_min.is_null() || + expr_max.is_min_value() || expr_max.is_max_value() || expr_max.is_null()) { + // do nothing + } else if (OB_UNLIKELY(!expr_min.can_compare(expr_max)) || + OB_UNLIKELY(!expr_min.can_compare(range_.start_)) || + OB_UNLIKELY(!expr_min.can_compare(range_.end_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("obj type is not consistent", K(expr_min), K(expr_max), KPC(this)); + } else if (OB_FAIL(ObOptEstObjToScalar::convert_objs_to_scalars(&expr_min, &expr_max, + &range_.start_, &range_.end_, + &min_scalar, &max_scalar, + &start_scalar, &end_scalar))) { + LOG_WARN("failed to convert obj to scalars", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(table_metas, ctx, *expr_, ctx.get_current_rows(), ndv))) { + LOG_WARN("failed to calculate distinct", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::do_calc_range_selectivity(min_scalar.get_double(), + max_scalar.get_double(), + start_scalar, + end_scalar, + ndv, + discrete, + border_flag, + dummy, + selectivity))) { + LOG_WARN("failed to do calc range selectivity", K(ret)); + } else if (!is_not_op_ && + OB_FAIL(refine_out_of_bounds_sel(table_metas, + ctx, + expr_min, + expr_max, + min_scalar.get_double(), + max_scalar.get_double(), + start_scalar.get_double(), + end_scalar.get_double(), + selectivity))) { + LOG_WARN("failed to refine out of bounds sel", K(ret)); + } else { + if (is_not_op_) { + selectivity = 1 - selectivity; + } + selectivity = std::max(selectivity, 1.0 / ndv); + selectivity *= not_null_sel; + } + LOG_DEBUG("succeed to calculate uniform range sel", + K(selectivity), K(discrete), K(expr_min), K(expr_max), K(range_), K(not_null_sel), K(ndv), KPC(expr_)); + return ret; +} + +int ObUniformRangeSelEstimator::refine_out_of_bounds_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObObj &min_val, + const ObObj &max_val, + const double min_scalar, + const double max_scalar, + const double start_scalar, + const double end_scalar, + double &selectivity) +{ + int ret = OB_SUCCESS; + ObSEArray column_exprs; + const OptTableMeta *table_meta = NULL; + double increase_rows_ratio = 0.0; + bool is_half = range_.start_.is_min_value() || range_.end_.is_max_value(); + double out_of_bounds_sel = 0.0; + bool need_calc = true; + if (OB_ISNULL(expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", KPC(this)); + } else if (expr_->get_relation_ids().num_members() != 1 || + min_val.is_min_value() || max_val.is_min_value() || + min_val.is_max_value() || max_val.is_max_value() || + (max_scalar - min_scalar < OB_DOUBLE_EPSINON) || + fabs(selectivity - 1.0) < OB_DOUBLE_EPSINON || + !range_.is_valid_range()) { + need_calc = false; + } else if (is_half) { + need_calc = true; + } else if (start_scalar >= min_scalar && end_scalar <= max_scalar) { + need_calc = false; + } else if (start_scalar <= min_scalar && end_scalar >= max_scalar) { + need_calc = false; + selectivity = 1.0; + } else if (start_scalar < min_scalar) { + need_calc = true; + out_of_bounds_sel = (std::min(min_scalar, end_scalar) - start_scalar) / (max_scalar - min_scalar); + } else if (end_scalar > max_scalar) { + need_calc = true; + out_of_bounds_sel = (end_scalar - std::max(max_scalar, start_scalar)) / (max_scalar - min_scalar); + } + if (OB_FAIL(ret) || !need_calc) { + } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(expr_, column_exprs))) { + LOG_WARN("extract_column_exprs error in clause_selectivity", K(ret)); + } else if (OB_UNLIKELY(column_exprs.count() < 1) || OB_ISNULL(column_exprs.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected column count", KPC(expr_), K(column_exprs)); + } else if (FALSE_IT(table_meta = table_metas.get_table_meta_by_table_id( + static_cast(column_exprs.at(0))->get_table_id()))) { + } else if (NULL == table_meta) { + // do nothing + } else if (OB_FAIL(table_meta->get_increase_rows_ratio(ctx.get_opt_ctx(), increase_rows_ratio))) { + LOG_WARN("failed to get extra rows", K(ret)); + } else if (is_half) { + selectivity = std::max(selectivity, DEFAULT_OUT_OF_BOUNDS_SEL * increase_rows_ratio); + } else { + selectivity += std::min(out_of_bounds_sel, increase_rows_ratio); + } + selectivity = ObOptSelectivity::revise_between_0_1(selectivity); + return ret; +} + }//end of namespace sql }//end of namespace oceanbase diff --git a/src/sql/optimizer/ob_sel_estimator.h b/src/sql/optimizer/ob_sel_estimator.h index b90ba5839a..b394b5618a 100644 --- a/src/sql/optimizer/ob_sel_estimator.h +++ b/src/sql/optimizer/ob_sel_estimator.h @@ -20,6 +20,44 @@ namespace oceanbase namespace sql { +struct SimpleRange +{ +public: + SimpleRange() + { + set_whole_range(); + } + + void set_whole_range(); + + void set_false_range(); + + int compare_with_end(const SimpleRange &r) const; + + int compare_with_start(const SimpleRange &r) const; + + bool intersect(const SimpleRange &r); + + void set_bound(ObItemType item_type, ObObj bound); + + void set_bound(ObItemType item_type, double bound); + + bool is_valid_range(); + + bool is_superset(const SimpleRange &r) const; + + void multiply_double(double coff); + + TO_STRING_KV(K_(start), K_(end), + K_(inclusive_start), + K_(inclusive_end)); + + ObObj start_; + ObObj end_; + bool inclusive_start_; + bool inclusive_end_; +}; + enum class ObSelEstType { INVALID = 0, @@ -34,9 +72,10 @@ enum class ObSelEstType EQUAL, LIKE, BOOL_OP, - RANGE, + COLUMN_RANGE, SIMPLE_JOIN, INEQUAL_JOIN, + UNIFORM_RANGE, }; class ObSelEstimatorFactory; @@ -73,16 +112,21 @@ private: class ObSelEstimatorFactory { public: - explicit ObSelEstimatorFactory(common::ObIAllocator &alloc) - : allocator_(alloc), - estimator_store_(alloc) + explicit ObSelEstimatorFactory() + : allocator_("ObOptSel"), + estimator_store_(allocator_) + {} + + explicit ObSelEstimatorFactory(int64_t tenant_id) + : allocator_("ObOptSel", OB_MALLOC_NORMAL_BLOCK_SIZE, tenant_id), + estimator_store_(allocator_) {} ~ObSelEstimatorFactory() { destory(); } - inline common::ObIAllocator &get_allocator() { return allocator_; } + inline common::ObArenaAllocator &get_allocator() { return allocator_; } inline void destory() { DLIST_FOREACH_NORET(node, estimator_store_.get_obj_list()) { @@ -96,6 +140,9 @@ public: int create_estimator(const OptSelectivityCtx &ctx, const ObRawExpr *expr, ObSelEstimator *&new_estimator); + int create_estimators(const OptSelectivityCtx &ctx, + ObIArray &exprs, + ObIArray &estimators); template inline int create_estimator_inner(EstimatorType *&new_estimator) @@ -121,7 +168,7 @@ public: const ObRawExpr &, ObSelEstimator *&); private: - common::ObIAllocator &allocator_; + common::ObArenaAllocator allocator_; common::ObObjStore estimator_store_; private: DISALLOW_COPY_AND_ASSIGN(ObSelEstimatorFactory); @@ -421,40 +468,31 @@ private: class ObIsSelEstimator : public ObIndependentSelEstimator { public: - ObIsSelEstimator() : ObIndependentSelEstimator(ObSelEstType::IS) {} + ObIsSelEstimator() : + ObIndependentSelEstimator(ObSelEstType::IS), + can_calc_sel_(false), + left_expr_(NULL), + right_const_obj_() + {} virtual ~ObIsSelEstimator() = default; static int create_estimator(ObSelEstimatorFactory &factory, const OptSelectivityCtx &ctx, const ObRawExpr &expr, - ObSelEstimator *&estimator) - { - return create_simple_estimator(factory, ctx, expr, estimator); - } - virtual bool tend_to_use_ds() override { return false; } + ObSelEstimator *&estimator); + + virtual bool tend_to_use_ds() override { return !can_calc_sel_; } virtual int get_sel(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, double &selectivity, - ObIArray &all_predicate_sel) override - { - int ret = OB_SUCCESS; - if (OB_ISNULL(expr_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null", KPC(this)); - } else { - ret = get_is_sel(table_metas, ctx, *expr_, selectivity); - } - return ret; - } + ObIArray &all_predicate_sel) override; inline static bool check_expr_valid(const ObRawExpr &expr) { return T_OP_IS == expr.get_expr_type() || T_OP_IS_NOT == expr.get_expr_type(); } private: - static int get_is_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); -private: + bool can_calc_sel_; + const ObRawExpr *left_expr_; + common::ObObj right_const_obj_; DISABLE_COPY_ASSIGN(ObIsSelEstimator); }; @@ -463,40 +501,28 @@ private: class ObBtwSelEstimator : public ObIndependentSelEstimator { public: - ObBtwSelEstimator() : ObIndependentSelEstimator(ObSelEstType::BTW) {} + ObBtwSelEstimator() : + ObIndependentSelEstimator(ObSelEstType::BTW), + can_calc_sel_(false), + col_expr_(NULL) + {} virtual ~ObBtwSelEstimator() = default; static int create_estimator(ObSelEstimatorFactory &factory, const OptSelectivityCtx &ctx, const ObRawExpr &expr, - ObSelEstimator *&estimator) - { - return create_simple_estimator(factory, ctx, expr, estimator); - } - virtual bool tend_to_use_ds() override { return false; } + ObSelEstimator *&estimator); + virtual bool tend_to_use_ds() override { return !can_calc_sel_; } virtual int get_sel(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, double &selectivity, - ObIArray &all_predicate_sel) override - { - int ret = OB_SUCCESS; - if (OB_ISNULL(expr_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null", KPC(this)); - } else { - ret = get_btw_sel(table_metas, ctx, *expr_, selectivity); - } - return ret; - } + ObIArray &all_predicate_sel) override; inline static bool check_expr_valid(const ObRawExpr &expr) { return T_OP_BTW == expr.get_expr_type() || T_OP_NOT_BTW == expr.get_expr_type(); } private: - static int get_btw_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); -private: + bool can_calc_sel_; + const ObColumnRefRawExpr *col_expr_; DISABLE_COPY_ASSIGN(ObBtwSelEstimator); }; @@ -506,40 +532,28 @@ private: class ObCmpSelEstimator : public ObIndependentSelEstimator { public: - ObCmpSelEstimator() : ObIndependentSelEstimator(ObSelEstType::CMP) {} + ObCmpSelEstimator() : + ObIndependentSelEstimator(ObSelEstType::CMP), + can_calc_sel_(false), + col_expr_(NULL) + {} virtual ~ObCmpSelEstimator() = default; static int create_estimator(ObSelEstimatorFactory &factory, const OptSelectivityCtx &ctx, const ObRawExpr &expr, - ObSelEstimator *&estimator) - { - return create_simple_estimator(factory, ctx, expr, estimator); - } - virtual bool tend_to_use_ds() override { return false; } + ObSelEstimator *&estimator); + virtual bool tend_to_use_ds() override { return !can_calc_sel_; } virtual int get_sel(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, double &selectivity, - ObIArray &all_predicate_sel) override - { - int ret = OB_SUCCESS; - if (OB_ISNULL(expr_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null", KPC(this)); - } else { - ret = get_range_cmp_sel(table_metas, ctx, *expr_, selectivity); - } - return ret; - } + ObIArray &all_predicate_sel) override; inline static bool check_expr_valid(const ObRawExpr &expr) { return IS_RANGE_CMP_OP(expr.get_expr_type()); } private: - static int get_range_cmp_sel(const OptTableMetas &table_metas, - const OptSelectivityCtx &ctx, - const ObRawExpr &qual, - double &selectivity); -private: + bool can_calc_sel_; + const ObColumnRefRawExpr *col_expr_; DISABLE_COPY_ASSIGN(ObCmpSelEstimator); }; @@ -549,17 +563,16 @@ class ObEqualSelEstimator : public ObIndependentSelEstimator { public: ObEqualSelEstimator() : - ObIndependentSelEstimator(ObSelEstType::EQUAL) {} + ObIndependentSelEstimator(ObSelEstType::EQUAL), + can_calc_sel_(false) + {} virtual ~ObEqualSelEstimator() = default; static int create_estimator(ObSelEstimatorFactory &factory, const OptSelectivityCtx &ctx, const ObRawExpr &expr, - ObSelEstimator *&estimator) - { - return create_simple_estimator(factory, ctx, expr, estimator); - } - virtual bool tend_to_use_ds() override { return false; } + ObSelEstimator *&estimator); + virtual bool tend_to_use_ds() override { return !can_calc_sel_; } virtual int get_sel(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, double &selectivity, @@ -625,7 +638,12 @@ private: const ObRawExpr &input_right_expr, ObItemType op_type, double &selectivity); + + static int check_can_calc_sel(const ObRawExpr &l_expr, + const ObRawExpr &r_expr, + bool &can_calc_sel); private: + bool can_calc_sel_; DISABLE_COPY_ASSIGN(ObEqualSelEstimator); }; @@ -643,7 +661,7 @@ public: variable_(NULL), pattern_(NULL), escape_(NULL), - can_calc_sel_(false), + can_calc_sel_by_prefix_(false), match_all_str_(false) {} virtual ~ObLikeSelEstimator() = default; @@ -651,18 +669,21 @@ public: const OptSelectivityCtx &ctx, const ObRawExpr &expr, ObSelEstimator *&estimator); - virtual bool tend_to_use_ds() override { return !can_calc_sel_; } + virtual bool tend_to_use_ds() override { return !can_calc_sel_by_prefix_; } virtual int get_sel(const OptTableMetas &table_metas, const OptSelectivityCtx &ctx, double &selectivity, ObIArray &all_predicate_sel) override; static int can_calc_like_sel(const OptSelectivityCtx &ctx, const ObRawExpr &expr, bool &can_calc_sel); - + int get_wildcard_length(const OptSelectivityCtx &ctx, double &wildcard_length); + int calculate_like_sel_by_substr(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity); private: const ObRawExpr *variable_; const ObRawExpr *pattern_; const ObRawExpr *escape_; - bool can_calc_sel_; + bool can_calc_sel_by_prefix_; bool match_all_str_; private: DISABLE_COPY_ASSIGN(ObLikeSelEstimator); @@ -687,6 +708,7 @@ public: const OptSelectivityCtx &ctx, double &selectivity, ObIArray &all_predicate_sel) override; + VIRTUAL_TO_STRING_KV(K_(type), K_(child_estimators)); private: common::ObSEArray child_estimators_; @@ -701,7 +723,7 @@ private: class ObRangeSelEstimator : public ObSelEstimator { public: - ObRangeSelEstimator() : ObSelEstimator(ObSelEstType::RANGE), column_expr_(NULL) {} + ObRangeSelEstimator() : ObSelEstimator(ObSelEstType::COLUMN_RANGE), column_expr_(NULL) {} virtual ~ObRangeSelEstimator() = default; static int create_estimator(ObSelEstimatorFactory &factory, @@ -792,6 +814,7 @@ private: const ObRelIds *left_rel_ids_; const ObRelIds *right_rel_ids_; + const ObRelIds *join_rel_ids_; common::ObSEArray join_conditions_; private: @@ -820,13 +843,7 @@ public: public: ObInequalJoinSelEstimator() : - ObSelEstimator(ObSelEstType::INEQUAL_JOIN), - has_lower_bound_(false), - has_upper_bound_(false), - include_lower_bound_(false), - include_upper_bound_(false), - lower_bound_(0), - upper_bound_(0) {} + ObSelEstimator(ObSelEstType::INEQUAL_JOIN) {} virtual ~ObInequalJoinSelEstimator() = default; static int create_estimator(ObSelEstimatorFactory &factory, @@ -843,10 +860,7 @@ public: virtual bool tend_to_use_ds() override { return false; } - VIRTUAL_TO_STRING_KV(K_(type), K_(term), - K_(has_lower_bound), K_(has_upper_bound), - K_(include_lower_bound), K_(include_upper_bound), - K_(lower_bound), K_(upper_bound)); + VIRTUAL_TO_STRING_KV(K_(type), K_(term), K_(range)); private: @@ -863,14 +877,6 @@ private: Term &term, double &offset); - static bool is_higher_lower_bound(double bound1, bool include1, double bound2, bool include2) - { - return bound1 > bound2 || (bound1 == bound2 && !include1 && include2); - } - static bool is_higher_upper_bound(double bound1, bool include1, double bound2, bool include2) - { - return bound1 > bound2 || (bound1 == bound2 && include1 && !include2); - } // c1 in [min1, max1], c2 in [min2, max2] // calc the sel of `c1 + c2 > offset`; static double get_gt_sel(double min1, @@ -902,25 +908,71 @@ private: double offset, bool is_semi); - double get_sel_for_point(double point1, double point2); - - void reverse(); - void update_lower_bound(double bound, bool include); - void update_upper_bound(double bound, bool include); - void set_bound(ObItemType item_type, double bound); + double get_sel_for_point(double point); Term term_; - bool has_lower_bound_; - bool has_upper_bound_; - bool include_lower_bound_; - bool include_upper_bound_; - double lower_bound_; - double upper_bound_; + SimpleRange range_; private: DISALLOW_COPY_AND_ASSIGN(ObInequalJoinSelEstimator); }; +class ObNormalRangeSelEstimator : public ObSelEstimator +{ +public: + ObNormalRangeSelEstimator(ObSelEstType type) : + ObSelEstimator(type), + expr_(NULL), + is_not_op_(false) {} + virtual ~ObNormalRangeSelEstimator() = default; + static int get_expr_range(const OptSelectivityCtx &ctx, + const ObRawExpr &qual, + const ObRawExpr *&expr, + SimpleRange &range, + bool &is_not_op, + bool &is_valid); + + virtual int merge(const ObSelEstimator &other, bool &is_success) override; + virtual bool is_independent() const override { return is_not_op_; } + void set_is_not_op(bool is_not) { is_not_op_ = is_not; } + + VIRTUAL_TO_STRING_KV(K_(type), KPC_(expr), K_(range), K_(is_not_op)); +protected: + const ObRawExpr *expr_; + SimpleRange range_; + bool is_not_op_; // not between +}; + +class ObUniformRangeSelEstimator : public ObNormalRangeSelEstimator +{ +public: + ObUniformRangeSelEstimator() : + ObNormalRangeSelEstimator(ObSelEstType::UNIFORM_RANGE) {} + virtual ~ObUniformRangeSelEstimator() = default; + + static int create_estimator(ObSelEstimatorFactory &factory, + const OptSelectivityCtx &ctx, + const ObRawExpr &expr, + ObSelEstimator *&estimator); + + virtual int get_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + double &selectivity, + ObIArray &all_predicate_sel) override; + + int refine_out_of_bounds_sel(const OptTableMetas &table_metas, + const OptSelectivityCtx &ctx, + const ObObj &min_value, + const ObObj &max_value, + const double min_scalar, + const double max_scalar, + const double start_scalar, + const double end_scalar, + double &selectivity); + + virtual bool tend_to_use_ds() override { return true; } +}; + } } diff --git a/src/sql/optimizer/ob_select_log_plan.cpp b/src/sql/optimizer/ob_select_log_plan.cpp index 1b33c59c86..186ac23bcb 100644 --- a/src/sql/optimizer/ob_select_log_plan.cpp +++ b/src/sql/optimizer/ob_select_log_plan.cpp @@ -3491,8 +3491,7 @@ int ObSelectLogPlan::get_minimal_cost_set_plan(const int64_t in_parallel, bool is_local_order = right_child->get_is_local_order() && !is_fully_partition_wise; ObPQDistributeMethod::Type dist_method = ObOptimizerUtil::get_right_dist_method (*right_child->get_sharding(), set_dist_algo); - right_plan->get_selectivity_ctx().init_op_ctx( - &right_child->get_output_equal_sets(), right_child->get_card()); + right_plan->get_selectivity_ctx().init_op_ctx(right_child); info.reset(); // is single, may allocate exchange above, set need_parallel_ as 1 and compute exchange cost in cost_sort_and_exchange info.need_parallel_ = right_child->is_single() ? ObGlobalHint::DEFAULT_PARALLEL : in_parallel; @@ -4845,7 +4844,8 @@ int ObSelectLogPlan::candi_allocate_window_function_with_hint(const ObIArrayget_output_const_exprs(), orig_top->get_card(), orig_top->get_is_at_most_one_row(), - qualify_filters); + qualify_filters, + orig_top->get_ambient_card()); while (OB_SUCC(ret) && !candi_plans.empty() && !remaining_exprs.empty()) { tmp_plans.reuse(); if (OB_FAIL(init_win_func_helper_with_hint(candi_plans, @@ -5100,7 +5100,8 @@ int ObSelectLogPlan::candi_allocate_window_function(const ObIArrayget_output_const_exprs(), orig_top->get_card(), orig_top->get_is_at_most_one_row(), - qualify_filters); + qualify_filters, + orig_top->get_ambient_card()); for (int64_t i = 0; OB_SUCC(ret) && i < candi_plans.count(); ++i) { OPT_TRACE("generate window function for plan:", candi_plans.at(i)); if (OB_FAIL(generate_window_functions_plan(win_func_helper, @@ -5847,6 +5848,7 @@ int ObSelectLogPlan::calc_ndvs_and_pby_oby_prefix(const ObIArray sort_key_exprs; sort_key_ndvs.reuse(); pby_oby_prefixes.reuse(); + get_selectivity_ctx().init_op_ctx(&win_func_helper.equal_sets_, card, &win_func_helper.ambient_card_); // Get NDV for each sort_keys prefix first. for (int64_t i = 0; i < sort_keys.count() && OB_SUCC(ret); i++) { @@ -7872,6 +7874,7 @@ int ObSelectLogPlan::init_selectivity_metas_for_set(ObSelectLogPlan *sub_plan, } else if (OB_ISNULL(best_plan)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); + } else if (FALSE_IT(sub_plan->get_selectivity_ctx().init_op_ctx(best_plan))) { } else if (OB_FAIL(get_basic_table_metas().add_set_child_stmt_meta_info( get_stmt(), sub_stmt, child_offset, sub_plan->get_update_table_metas(), diff --git a/src/sql/optimizer/ob_select_log_plan.h b/src/sql/optimizer/ob_select_log_plan.h index eb000756d0..0ae361ee9e 100644 --- a/src/sql/optimizer/ob_select_log_plan.h +++ b/src/sql/optimizer/ob_select_log_plan.h @@ -485,7 +485,8 @@ private: const ObIArray &const_exprs, const double card, const bool is_at_most_one_row, - const ObIArray &qualify_filters) + const ObIArray &qualify_filters, + const ObIArray &ambient_card) : all_win_func_exprs_(all_win_func_exprs), win_dist_hint_(win_dist_hint), explicit_hint_(explicit_hint), @@ -507,7 +508,8 @@ private: enable_topn_(false), topn_const_(NULL), is_fetch_with_ties_(false), - origin_sort_card_(0.0) + origin_sort_card_(0.0), + ambient_card_(ambient_card) { } virtual ~WinFuncOpHelper() {} @@ -545,6 +547,7 @@ private: ObRawExpr* topn_const_; bool is_fetch_with_ties_; double origin_sort_card_; + const ObIArray &ambient_card_; TO_STRING_KV(K_(win_dist_method), K_(win_op_idx), @@ -557,6 +560,7 @@ private: K_(ordered_win_func_exprs), K_(win_dist_hint), K_(explicit_hint), + K_(ambient_card), K_(enable_topn), K_(topn_const), K_(is_fetch_with_ties), diff --git a/src/sql/resolver/ddl/ob_analyze_stmt_resolver.cpp b/src/sql/resolver/ddl/ob_analyze_stmt_resolver.cpp index 20bfea94d5..966623f13d 100644 --- a/src/sql/resolver/ddl/ob_analyze_stmt_resolver.cpp +++ b/src/sql/resolver/ddl/ob_analyze_stmt_resolver.cpp @@ -462,6 +462,7 @@ int ObAnalyzeStmtResolver::resolve_for_clause_element(const ParseNode *for_claus int ret = OB_SUCCESS; ObSEArray all_for_col; ObAnalyzeTableInfo &table_info = analyze_stmt.get_tables().at(0); + bool is_async_gather = false; if (OB_ISNULL(for_clause_node)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null parse node", K(ret)); @@ -474,6 +475,7 @@ int ObAnalyzeStmtResolver::resolve_for_clause_element(const ParseNode *for_claus bool use_size_auto = false; if (OB_FAIL(pl::ObDbmsStats::parser_for_all_clause(for_clause_node, table_info.get_column_params(), + is_async_gather, use_size_auto))) { LOG_WARN("failed to resolve for all clause", K(ret)); } else { diff --git a/src/sql/resolver/dml/ob_dml_stmt.cpp b/src/sql/resolver/dml/ob_dml_stmt.cpp index a1669c3602..b2b8ff088e 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.cpp +++ b/src/sql/resolver/dml/ob_dml_stmt.cpp @@ -3019,6 +3019,26 @@ int ObDMLStmt::relids_to_table_ids(const ObSqlBitSet<> &table_set, return ret; } +int ObDMLStmt::relids_to_table_ids(const ObRelIds &table_set, + ObIArray &table_ids) const +{ + int ret = OB_SUCCESS; + TableItem *table = NULL; + int64_t idx = OB_INVALID_INDEX; + for (int64_t i = 0; OB_SUCC(ret) && i < table_items_.count(); ++i) { + if (OB_ISNULL(table = table_items_.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table item is null", K(ret)); + } else if (OB_UNLIKELY((idx = get_table_bit_index(table->table_id_)) == OB_INVALID_INDEX)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get table item invalid idx", K(idx), K(table->table_id_)); + } else if (table_set.has_member(idx)) { + ret = table_ids.push_back(table->table_id_); + } + } + return ret; +} + int ObDMLStmt::relids_to_table_items(const ObRelIds &table_set, ObIArray &tables) const { diff --git a/src/sql/resolver/dml/ob_dml_stmt.h b/src/sql/resolver/dml/ob_dml_stmt.h index 71a82785ef..9c52250e97 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.h +++ b/src/sql/resolver/dml/ob_dml_stmt.h @@ -959,6 +959,7 @@ public: int relids_to_table_items(const ObRelIds &table_set, ObIArray &tables) const; int relids_to_table_items(const ObSqlBitSet<> &table_set, ObIArray &tables) const; + int relids_to_table_ids(const ObRelIds &table_set, ObIArray &table_ids) const; int relids_to_table_ids(const ObSqlBitSet<> &table_set, ObIArray &table_ids) const; int get_table_rel_ids(const TableItem &target, ObSqlBitSet<> &table_set) const; int get_table_rel_ids(const ObIArray &table_ids, ObSqlBitSet<> &table_set) const; diff --git a/src/sql/resolver/dml/ob_hint.cpp b/src/sql/resolver/dml/ob_hint.cpp index d4a395e5f1..b838c60518 100644 --- a/src/sql/resolver/dml/ob_hint.cpp +++ b/src/sql/resolver/dml/ob_hint.cpp @@ -876,6 +876,16 @@ bool ObOptParamHint::is_param_val_valid(const OptParamType param_type, const ObO is_valid = val.is_int() && (0 <= val.get_int() && val.get_int() <= 4); break; } + case CORRELATION_FOR_CARDINALITY_ESTIMATION: { + if (val.is_int()) { + is_valid = 0 <= val.get_int() && val.get_int() < static_cast(ObEstCorrelationType::MAX); + } else if (val.is_varchar()) { + int64_t type = OB_INVALID_ID; + ObSysVarCardinalityEstimationModel sv; + is_valid = (OB_SUCCESS == sv.find_type(val.get_varchar(), type)); + } + break; + } default: LOG_TRACE("invalid opt param val", K(param_type), K(val)); break; @@ -960,6 +970,36 @@ int ObOptParamHint::get_integer_opt_param(const OptParamType param_type, int64_t return ret; } +int ObOptParamHint::get_enum_opt_param(const OptParamType param_type, int64_t &val) const +{ + int ret = OB_SUCCESS; + ObObj obj; + if (OB_FAIL(get_opt_param(param_type, obj))) { + LOG_WARN("fail to get rowsets_enabled opt_param", K(ret)); + } else if (obj.is_nop_value()) { + // do nothing + } else if (obj.is_int()) { + val = obj.get_int(); + } else if (obj.is_varchar()) { + switch (param_type) { + case CORRELATION_FOR_CARDINALITY_ESTIMATION: { + ObSysVarCardinalityEstimationModel sv; + if (OB_FAIL(sv.find_type(obj.get_varchar(), val))) { + LOG_WARN("param obj is invalid", K(ret), K(obj)); + } + break; + } + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("enum param is invalid", K(ret), K(obj)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param obj is invalid", K(ret), K(obj)); + } + return ret; +} + int ObOptParamHint::has_opt_param(const OptParamType param_type, bool &has_hint) const { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/dml/ob_hint.h b/src/sql/resolver/dml/ob_hint.h index 135c0df638..40afd02ca1 100644 --- a/src/sql/resolver/dml/ob_hint.h +++ b/src/sql/resolver/dml/ob_hint.h @@ -160,6 +160,7 @@ struct ObOptParamHint DEF(SPILL_COMPRESSION_CODEC,) \ DEF(INLIST_REWRITE_THRESHOLD,) \ DEF(PUSHDOWN_STORAGE_LEVEL,) \ + DEF(CORRELATION_FOR_CARDINALITY_ESTIMATION,) \ DECLARE_ENUM(OptParamType, opt_param, OPT_PARAM_TYPE_DEF, static); @@ -175,6 +176,7 @@ struct ObOptParamHint // if the corresponding opt_param is specified, the `val` will be overwritten int get_bool_opt_param(const OptParamType param_type, bool &val) const; int get_integer_opt_param(const OptParamType param_type, int64_t &val) const; + int get_enum_opt_param(const OptParamType param_type, int64_t &val) const; int has_opt_param(const OptParamType param_type, bool &has_hint) const; bool empty() const { return param_types_.empty(); } int check_and_get_bool_opt_param(const OptParamType param_type, bool &has_opt_param, bool &val) const; diff --git a/src/sql/resolver/expr/ob_raw_expr_util.cpp b/src/sql/resolver/expr/ob_raw_expr_util.cpp index 25b1ec17b6..bf94aa0a42 100644 --- a/src/sql/resolver/expr/ob_raw_expr_util.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_util.cpp @@ -3681,7 +3681,7 @@ int ObRawExprUtils::extract_contain_exprs(ObRawExpr *raw_expr, } int ObRawExprUtils::extract_column_exprs(ObIArray &exprs, - ObRelIds &rel_ids, + const ObRelIds &rel_ids, ObIArray &column_exprs) { int ret = OB_SUCCESS; @@ -3697,7 +3697,7 @@ int ObRawExprUtils::extract_column_exprs(ObIArray &exprs, } int ObRawExprUtils::extract_column_exprs(ObRawExpr* expr, - ObRelIds &rel_ids, + const ObRelIds &rel_ids, ObIArray &column_exprs) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/expr/ob_raw_expr_util.h b/src/sql/resolver/expr/ob_raw_expr_util.h index 1b126bff5f..cc62bfc11f 100644 --- a/src/sql/resolver/expr/ob_raw_expr_util.h +++ b/src/sql/resolver/expr/ob_raw_expr_util.h @@ -431,10 +431,10 @@ public: static int extract_column_exprs(const ObRawExpr *expr, ObIArray &column_exprs); static int extract_column_exprs(ObRawExpr* expr, - ObRelIds &rel_ids, + const ObRelIds &rel_ids, ObIArray &column_exprs); static int extract_column_exprs(ObIArray &exprs, - ObRelIds &rel_ids, + const ObRelIds &rel_ids, ObIArray &column_exprs); static int extract_contain_exprs(ObRawExpr *raw_expr, const common::ObIArray &src_exprs, diff --git a/src/sql/rewrite/ob_transform_groupby_pullup.cpp b/src/sql/rewrite/ob_transform_groupby_pullup.cpp index 698ec1af6b..caebe79b42 100644 --- a/src/sql/rewrite/ob_transform_groupby_pullup.cpp +++ b/src/sql/rewrite/ob_transform_groupby_pullup.cpp @@ -1193,7 +1193,7 @@ int ObTransformGroupByPullup::calc_group_exprs_ndv(const ObIArray &g LOG_WARN("unexpect null logical operator", K(ret)); } else { card = child_op->get_card(); - plan->get_selectivity_ctx().init_op_ctx(&child_op->get_output_equal_sets(), card); + plan->get_selectivity_ctx().init_op_ctx(child_op); if (group_exprs.empty()) { group_ndv = 1.0; } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(plan->get_update_table_metas(), diff --git a/tools/deploy/mysql_test/r/mysql/view_2.result b/tools/deploy/mysql_test/r/mysql/view_2.result index 145f1ce051..f8040d6477 100644 --- a/tools/deploy/mysql_test/r/mysql/view_2.result +++ b/tools/deploy/mysql_test/r/mysql/view_2.result @@ -481,7 +481,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t11] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t21: @@ -493,7 +493,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t21] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] Plan Type: diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result index 0b120b7ed8..7e0e7c806c 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result @@ -42,8 +42,8 @@ Query Plan ================================================================ |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ---------------------------------------------------------------- -|0 |SORT | |6 |35 | -|1 |└─NESTED-LOOP JOIN | |6 |35 | +|0 |SORT | |5 |35 | +|1 |└─NESTED-LOOP JOIN | |5 |35 | |2 | ├─COLUMN TABLE FULL SCAN |t4 |2 |3 | |3 | └─DISTRIBUTED TABLE RANGE SCAN|t3 |3 |16 | ================================================================ diff --git a/tools/deploy/mysql_test/test_suite/datatype/r/mysql/div.result b/tools/deploy/mysql_test/test_suite/datatype/r/mysql/div.result index 88eabcee6e..183cbc1c58 100644 --- a/tools/deploy/mysql_test/test_suite/datatype/r/mysql/div.result +++ b/tools/deploy/mysql_test/test_suite/datatype/r/mysql/div.result @@ -198557,7 +198557,7 @@ Query Plan |1 |└─PX COORDINATOR | |1 |6 | |2 | └─EXCHANGE OUT DISTR |:EX10000 |1 |6 | |3 | └─MERGE GROUP BY | |1 |6 | -|4 | └─NESTED-LOOP ANTI JOIN | |14 |5 | +|4 | └─NESTED-LOOP ANTI JOIN | |14 |6 | |5 | ├─PX PARTITION ITERATOR| |14 |5 | |6 | │ └─TABLE FULL SCAN |table1000_key_pk_parts_2|14 |5 | |7 | └─MATERIAL | |1 |1 | diff --git a/tools/deploy/mysql_test/test_suite/executor/r/mysql/basic.result b/tools/deploy/mysql_test/test_suite/executor/r/mysql/basic.result index 5fe19c057c..be99f608b8 100644 --- a/tools/deploy/mysql_test/test_suite/executor/r/mysql/basic.result +++ b/tools/deploy/mysql_test/test_suite/executor/r/mysql/basic.result @@ -351,8 +351,8 @@ Query Plan ========================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------- -|0 |PX COORDINATOR MERGE SORT | |1 |41 | -|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |41 | +|0 |PX COORDINATOR MERGE SORT | |1 |42 | +|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |42 | |2 | └─MERGE GROUP BY | |1 |41 | |3 | └─EXCHANGE IN MERGE SORT DISTR | |20 |39 | |4 | └─EXCHANGE OUT DISTR (HASH) |:EX10000 |20 |31 | @@ -462,7 +462,7 @@ Query Plan |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------- |0 |PX COORDINATOR MERGE SORT | |1 |44 | -|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |43 | +|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |44 | |2 | └─MERGE GROUP BY | |1 |43 | |3 | └─EXCHANGE IN MERGE SORT DISTR | |20 |41 | |4 | └─EXCHANGE OUT DISTR (HASH) |:EX10000 |20 |33 | @@ -571,7 +571,7 @@ Query Plan ============================================================================ |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------------------------------- -|0 |PX COORDINATOR MERGE SORT | |1 |60 | +|0 |PX COORDINATOR MERGE SORT | |1 |61 | |1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |60 | |2 | └─MERGE GROUP BY | |1 |60 | |3 | └─EXCHANGE IN MERGE SORT DISTR | |20 |58 | diff --git a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result index 6480e4d338..2df7cdf8a1 100644 --- a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result +++ b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result @@ -133,7 +133,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -195,7 +195,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -257,7 +257,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -269,7 +269,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |879 | +|0 |TABLE FULL SCAN|t(idx)|4 |879 | ================================================= Outputs & filters: ------------------------------------- @@ -315,12 +315,12 @@ Optimization Info: physical_range_rows:26 logical_range_rows:26 index_back_rows:6 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -382,7 +382,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -445,7 +445,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -457,7 +457,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |799 | +|0 |TABLE FULL SCAN|t(idx)|3 |799 | ================================================= Outputs & filters: ------------------------------------- @@ -503,12 +503,12 @@ Optimization Info: physical_range_rows:24 logical_range_rows:24 index_back_rows:6 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -570,7 +570,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -691,7 +691,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -703,7 +703,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |157 | +|0 |TABLE FULL SCAN|t(idx)|1 |158 | ================================================= Outputs & filters: ------------------------------------- @@ -753,7 +753,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1024,7 +1024,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[geom, geo_table2] pruned_index_name:[geo_table2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1094,7 +1094,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[geom, geo_table2] pruned_index_name:[geo_table2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1180,7 +1180,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[geom, geo_table] pruned_index_name:[geo_table] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1348,7 +1348,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|4 |999 | ================================================= Outputs & filters: ------------------------------------- @@ -1393,12 +1393,12 @@ Optimization Info: physical_range_rows:31 logical_range_rows:31 index_back_rows:7 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1410,7 +1410,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|4 |999 | ================================================= Outputs & filters: ------------------------------------- @@ -1455,12 +1455,12 @@ Optimization Info: physical_range_rows:31 logical_range_rows:31 index_back_rows:7 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1472,7 +1472,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|4 |999 | ================================================= Outputs & filters: ------------------------------------- @@ -1517,12 +1517,12 @@ Optimization Info: physical_range_rows:31 logical_range_rows:31 index_back_rows:7 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1534,7 +1534,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|3 |1128 | +|0 |TABLE FULL SCAN|t(idx)|5 |1128 | ================================================= Outputs & filters: ------------------------------------- @@ -1581,12 +1581,12 @@ Optimization Info: physical_range_rows:35 logical_range_rows:35 index_back_rows:8 - output_rows:2 + output_rows:4 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1598,7 +1598,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|4 |999 | ================================================= Outputs & filters: ------------------------------------- @@ -1643,12 +1643,12 @@ Optimization Info: physical_range_rows:31 logical_range_rows:31 index_back_rows:7 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1660,7 +1660,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |1031 | +|0 |TABLE FULL SCAN|t(idx)|4 |1032 | ================================================= Outputs & filters: ------------------------------------- @@ -1706,12 +1706,12 @@ Optimization Info: physical_range_rows:32 logical_range_rows:32 index_back_rows:8 - output_rows:2 + output_rows:4 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1774,7 +1774,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1786,7 +1786,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|4 |999 | ================================================= Outputs & filters: ------------------------------------- @@ -1831,12 +1831,12 @@ Optimization Info: physical_range_rows:31 logical_range_rows:31 index_back_rows:7 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2010,7 +2010,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2072,7 +2072,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2134,7 +2134,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2146,7 +2146,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|4 |1526 | +|0 |TABLE FULL SCAN|t(idx)|7 |1526 | ================================================= Outputs & filters: ------------------------------------- @@ -2192,12 +2192,12 @@ Optimization Info: physical_range_rows:49 logical_range_rows:49 index_back_rows:12 - output_rows:3 + output_rows:6 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2259,7 +2259,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2322,7 +2322,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2334,7 +2334,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|3 |1387 | +|0 |TABLE FULL SCAN|t(idx)|6 |1388 | ================================================= Outputs & filters: ------------------------------------- @@ -2380,12 +2380,12 @@ Optimization Info: physical_range_rows:45 logical_range_rows:45 index_back_rows:11 - output_rows:2 + output_rows:5 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2447,7 +2447,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2459,7 +2459,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|4 |999 | ================================================= Outputs & filters: ------------------------------------- @@ -2504,12 +2504,12 @@ Optimization Info: physical_range_rows:31 logical_range_rows:31 index_back_rows:7 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2521,7 +2521,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|4 |999 | ================================================= Outputs & filters: ------------------------------------- @@ -2566,12 +2566,12 @@ Optimization Info: physical_range_rows:31 logical_range_rows:31 index_back_rows:7 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2583,7 +2583,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|4 |999 | ================================================= Outputs & filters: ------------------------------------- @@ -2628,12 +2628,12 @@ Optimization Info: physical_range_rows:31 logical_range_rows:31 index_back_rows:7 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2645,7 +2645,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|3 |1128 | +|0 |TABLE FULL SCAN|t(idx)|5 |1128 | ================================================= Outputs & filters: ------------------------------------- @@ -2692,12 +2692,12 @@ Optimization Info: physical_range_rows:35 logical_range_rows:35 index_back_rows:8 - output_rows:2 + output_rows:4 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2709,7 +2709,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|4 |999 | ================================================= Outputs & filters: ------------------------------------- @@ -2754,12 +2754,12 @@ Optimization Info: physical_range_rows:31 logical_range_rows:31 index_back_rows:7 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2771,7 +2771,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |1031 | +|0 |TABLE FULL SCAN|t(idx)|4 |1032 | ================================================= Outputs & filters: ------------------------------------- @@ -2817,12 +2817,12 @@ Optimization Info: physical_range_rows:32 logical_range_rows:32 index_back_rows:8 - output_rows:2 + output_rows:4 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2885,7 +2885,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2897,7 +2897,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|4 |999 | ================================================= Outputs & filters: ------------------------------------- @@ -2942,12 +2942,12 @@ Optimization Info: physical_range_rows:31 logical_range_rows:31 index_back_rows:7 - output_rows:1 + output_rows:3 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -3031,7 +3031,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: diff --git a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result index 088d643c60..5350144437 100644 --- a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result +++ b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result @@ -247,7 +247,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, tt2] pruned_index_name:[idx] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -296,7 +296,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, tt2] pruned_index_name:[idx] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -438,7 +438,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, tt2] unstable_index_name:[tt2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -577,7 +577,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, tt2] pruned_index_name:[tt2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -716,7 +716,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, tt2] unstable_index_name:[tt2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -855,7 +855,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[idx, tt2] pruned_index_name:[tt2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: diff --git a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_partition_table_mysql.result b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_partition_table_mysql.result index 47c1a2405e..98b28e096f 100644 --- a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_partition_table_mysql.result +++ b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_partition_table_mysql.result @@ -61,10 +61,10 @@ Query Plan ============================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- -|0 |PX COORDINATOR | |1 |543 | -|1 |└─EXCHANGE OUT DISTR |:EX10000|1 |542 | -|2 | └─PX PARTITION ITERATOR| |1 |541 | -|3 | └─TABLE FULL SCAN |t1(idx) |1 |541 | +|0 |PX COORDINATOR | |2 |544 | +|1 |└─EXCHANGE OUT DISTR |:EX10000|2 |543 | +|2 | └─PX PARTITION ITERATOR| |2 |541 | +|3 | └─TABLE FULL SCAN |t1(idx) |2 |541 | ============================================================= Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/spatial_relation_join_mysql.result b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/spatial_relation_join_mysql.result index 06ce1c9ae7..bc29ba4c5c 100644 --- a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/spatial_relation_join_mysql.result +++ b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/spatial_relation_join_mysql.result @@ -89,7 +89,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[tgnoindex2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] tgnoindex1: @@ -101,7 +101,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[tgnoindex1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] Plan Type: @@ -196,7 +196,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[gidx1, tgeom1] pruned_index_name:[gidx1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] tgeom2: @@ -208,7 +208,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[gidx2, tgeom2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: @@ -312,7 +312,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[pgidx1, ptgeom1] pruned_index_name:[pgidx1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] ptgeom2: @@ -324,7 +324,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[pgidx2, ptgeom2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: diff --git a/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result b/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result index 0a9e0afe8d..944f13961f 100644 --- a/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result +++ b/tools/deploy/mysql_test/test_suite/join/r/mysql/anti_semi_join.result @@ -954,7 +954,7 @@ Query Plan ======================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| -------------------------------------------------------- -|0 |NESTED-LOOP ANTI JOIN | |66 |55 | +|0 |NESTED-LOOP ANTI JOIN | |66 |56 | |1 |├─TABLE FULL SCAN |xy_x_t|74 |6 | |2 |└─MATERIAL | |16 |6 | |3 | └─TABLE FULL SCAN |xy_y_t|16 |3 | @@ -2352,7 +2352,7 @@ Query Plan ======================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------- -|0 |NESTED-LOOP ANTI JOIN | |1 |14 | +|0 |NESTED-LOOP ANTI JOIN | |1 |13 | |1 |├─TABLE FULL SCAN |xy_t2|8 |3 | |2 |└─MATERIAL | |116 |9 | |3 | └─TABLE FULL SCAN |xy_t1|116 |6 | @@ -2751,8 +2751,8 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |SCALAR GROUP BY | |1 |582 | -|1 |└─SUBPLAN FILTER | |29 |581 | +|0 |SCALAR GROUP BY | |1 |583 | +|1 |└─SUBPLAN FILTER | |42 |581 | |2 | ├─TABLE FULL SCAN|xy_t1|116 |8 | |3 | ├─TABLE FULL SCAN|xy_t3|1 |3 | |4 | └─TABLE FULL SCAN|xy_t2|1 |3 | @@ -2829,7 +2829,7 @@ Query Plan |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------- |0 |SCALAR GROUP BY | |1 |4168 | -|1 |└─SUBPLAN FILTER | |29 |4167 | +|1 |└─SUBPLAN FILTER | |42 |4167 | |2 | ├─TABLE FULL SCAN |xy_t1 |116 |8 | |3 | ├─DISTRIBUTED TABLE RANGE SCAN|xy_t3(idx_c2)|1 |18 | |4 | └─DISTRIBUTED TABLE RANGE SCAN|xy_t2(idx_c2)|1 |18 | @@ -4318,7 +4318,7 @@ Query Plan ======================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| -------------------------------------------------------- -|0 |NESTED-LOOP ANTI JOIN | |5 |96 | +|0 |NESTED-LOOP ANTI JOIN | |5 |97 | |1 |├─TABLE FULL SCAN |xy_t2|5 |3 | |2 |└─DISTRIBUTED TABLE GET|xy_t1|1 |18 | ======================================================== diff --git a/tools/deploy/mysql_test/test_suite/join/r/mysql/join_merge.result b/tools/deploy/mysql_test/test_suite/join/r/mysql/join_merge.result index f527899861..0383242282 100644 --- a/tools/deploy/mysql_test/test_suite/join/r/mysql/join_merge.result +++ b/tools/deploy/mysql_test/test_suite/join/r/mysql/join_merge.result @@ -19,7 +19,7 @@ Query Plan =================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |MERGE JOIN | |5 |7 | +|0 |MERGE JOIN | |6 |7 | |1 |├─SORT | |5 |3 | |2 |│ └─TABLE FULL SCAN|bb |5 |3 | |3 |└─SORT | |6 |4 | @@ -52,7 +52,7 @@ Query Plan =================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |MERGE JOIN | |5 |7 | +|0 |MERGE JOIN | |6 |7 | |1 |├─SORT | |5 |3 | |2 |│ └─TABLE FULL SCAN|bb |5 |3 | |3 |└─SORT | |6 |4 | @@ -85,7 +85,7 @@ Query Plan =================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |MERGE JOIN | |5 |7 | +|0 |MERGE JOIN | |6 |7 | |1 |├─SORT | |5 |3 | |2 |│ └─TABLE FULL SCAN|bb |5 |3 | |3 |└─SORT | |6 |4 | @@ -118,7 +118,7 @@ Query Plan =================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |MERGE JOIN | |5 |7 | +|0 |MERGE JOIN | |6 |7 | |1 |├─SORT | |5 |3 | |2 |│ └─TABLE FULL SCAN|bb |5 |3 | |3 |└─SORT | |6 |4 | @@ -155,7 +155,7 @@ Query Plan =================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |MERGE JOIN | |5 |8 | +|0 |MERGE JOIN | |6 |8 | |1 |├─SORT | |5 |4 | |2 |│ └─TABLE FULL SCAN|bb |5 |3 | |3 |└─SORT | |6 |4 | @@ -188,7 +188,7 @@ Query Plan =================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |MERGE JOIN | |5 |8 | +|0 |MERGE JOIN | |6 |8 | |1 |├─SORT | |5 |4 | |2 |│ └─TABLE FULL SCAN|bb |5 |3 | |3 |└─SORT | |6 |4 | @@ -221,7 +221,7 @@ Query Plan =================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |MERGE JOIN | |5 |8 | +|0 |MERGE JOIN | |6 |8 | |1 |├─SORT | |5 |4 | |2 |│ └─TABLE FULL SCAN|bb |5 |3 | |3 |└─SORT | |6 |4 | @@ -254,7 +254,7 @@ Query Plan =================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |MERGE JOIN | |5 |8 | +|0 |MERGE JOIN | |6 |8 | |1 |├─SORT | |5 |4 | |2 |│ └─TABLE FULL SCAN|bb |5 |3 | |3 |└─SORT | |6 |4 | @@ -289,7 +289,7 @@ Query Plan |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------------- |0 |MERGE JOIN | |2 |12 | -|1 |├─MERGE JOIN | |5 |8 | +|1 |├─MERGE JOIN | |6 |8 | |2 |│ ├─SORT | |6 |4 | |3 |│ │ └─TABLE FULL SCAN|aa |6 |3 | |4 |│ └─SORT | |5 |4 | @@ -333,7 +333,7 @@ Query Plan |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| ----------------------------------------------------- |0 |MERGE JOIN | |2 |12 | -|1 |├─MERGE JOIN | |5 |8 | +|1 |├─MERGE JOIN | |6 |8 | |2 |│ ├─SORT | |6 |4 | |3 |│ │ └─TABLE FULL SCAN|aa |6 |3 | |4 |│ └─SORT | |5 |4 | @@ -476,7 +476,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE JOIN | |5 |35 | +|0 |MERGE JOIN | |6 |35 | |1 |├─PX COORDINATOR MERGE SORT | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |15 | |3 |│ └─SORT | |5 |12 | @@ -527,7 +527,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE JOIN | |5 |35 | +|0 |MERGE JOIN | |6 |35 | |1 |├─PX COORDINATOR MERGE SORT | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |15 | |3 |│ └─SORT | |5 |12 | @@ -578,7 +578,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE JOIN | |5 |35 | +|0 |MERGE JOIN | |6 |35 | |1 |├─PX COORDINATOR MERGE SORT | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |15 | |3 |│ └─SORT | |5 |12 | @@ -629,7 +629,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE JOIN | |5 |35 | +|0 |MERGE JOIN | |6 |35 | |1 |├─PX COORDINATOR MERGE SORT | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |15 | |3 |│ └─SORT | |5 |12 | @@ -684,7 +684,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE JOIN | |5 |35 | +|0 |MERGE JOIN | |6 |35 | |1 |├─PX COORDINATOR MERGE SORT | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |15 | |3 |│ └─SORT | |5 |12 | @@ -735,7 +735,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE JOIN | |5 |35 | +|0 |MERGE JOIN | |6 |35 | |1 |├─PX COORDINATOR MERGE SORT | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |15 | |3 |│ └─SORT | |5 |12 | @@ -786,7 +786,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE JOIN | |5 |35 | +|0 |MERGE JOIN | |6 |35 | |1 |├─PX COORDINATOR MERGE SORT | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |15 | |3 |│ └─SORT | |5 |12 | @@ -837,7 +837,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE JOIN | |5 |35 | +|0 |MERGE JOIN | |6 |35 | |1 |├─PX COORDINATOR MERGE SORT | |5 |17 | |2 |│ └─EXCHANGE OUT DISTR |:EX10000|5 |15 | |3 |│ └─SORT | |5 |12 | @@ -889,8 +889,8 @@ Query Plan =================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------- -|0 |MERGE JOIN | |2 |50 | -|1 |├─MERGE JOIN | |5 |35 | +|0 |MERGE JOIN | |2 |51 | +|1 |├─MERGE JOIN | |6 |35 | |2 |│ ├─PX COORDINATOR MERGE SORT | |6 |18 | |3 |│ │ └─EXCHANGE OUT DISTR |:EX10000|6 |16 | |4 |│ │ └─SORT | |6 |12 | @@ -960,8 +960,8 @@ Query Plan =================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------- -|0 |MERGE JOIN | |2 |50 | -|1 |├─MERGE JOIN | |5 |35 | +|0 |MERGE JOIN | |2 |51 | +|1 |├─MERGE JOIN | |6 |35 | |2 |│ ├─PX COORDINATOR MERGE SORT | |6 |18 | |3 |│ │ └─EXCHANGE OUT DISTR |:EX10000|6 |16 | |4 |│ │ └─SORT | |6 |12 | @@ -1659,7 +1659,7 @@ Query Plan |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| --------------------------------------------------------- |0 |SORT | |13 |9 | -|1 |└─NESTED-LOOP OUTER JOIN | |13 |5 | +|1 |└─NESTED-LOOP OUTER JOIN | |13 |4 | |2 | ├─TABLE FULL SCAN |t2 |13 |3 | |3 | └─MATERIAL | |5 |4 | |4 | └─TABLE FULL SCAN |t7 |5 |3 | @@ -1700,7 +1700,7 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |MERGE JOIN | |7 |6 | +|0 |MERGE JOIN | |8 |6 | |1 |├─TABLE FULL SCAN|t8(idx)|6 |3 | |2 |└─TABLE FULL SCAN|t9(idx)|7 |3 | ==================================================== @@ -1761,7 +1761,7 @@ Query Plan ==================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------- -|0 |MERGE OUTER JOIN | |7 |6 | +|0 |MERGE OUTER JOIN | |8 |6 | |1 |├─TABLE FULL SCAN|t8(idx)|6 |3 | |2 |└─TABLE FULL SCAN|t9(idx)|7 |3 | ==================================================== @@ -1827,7 +1827,7 @@ Query Plan ========================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------------- -|0 |MERGE RIGHT OUTER JOIN | |7 |6 | +|0 |MERGE RIGHT OUTER JOIN | |8 |6 | |1 |├─TABLE FULL SCAN |t8(idx)|6 |3 | |2 |└─TABLE FULL SCAN |t9(idx)|7 |3 | ========================================================== @@ -1895,7 +1895,7 @@ Query Plan ========================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------------- -|0 |MERGE RIGHT OUTER JOIN | |7 |6 | +|0 |MERGE RIGHT OUTER JOIN | |8 |6 | |1 |├─TABLE FULL SCAN |t8(idx)|6 |3 | |2 |└─TABLE FULL SCAN |t9(idx)|7 |3 | ========================================================== diff --git a/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon_where.result b/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon_where.result index d2e6e9ef82..2d3c1c5223 100644 --- a/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon_where.result +++ b/tools/deploy/mysql_test/test_suite/join/r/mysql/nested_loop_join_right_null_joinon_where.result @@ -52,7 +52,7 @@ Query Plan ================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| -------------------------------------------------- -|0 |MERGE JOIN | |2 |5 | +|0 |MERGE JOIN | |1 |5 | |1 |├─TABLE RANGE SCAN|t1 |1 |3 | |2 |└─TABLE RANGE SCAN|t2 |2 |3 | ================================================== @@ -82,7 +82,7 @@ Query Plan ================================================== |ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| -------------------------------------------------- -|0 |MERGE JOIN | |2 |5 | +|0 |MERGE JOIN | |1 |5 | |1 |├─TABLE RANGE SCAN|a |1 |3 | |2 |└─TABLE RANGE SCAN|b |2 |3 | ================================================== diff --git a/tools/deploy/mysql_test/test_suite/px/r/mysql/agg.result b/tools/deploy/mysql_test/test_suite/px/r/mysql/agg.result index c1ead3a1f9..22d904b072 100644 --- a/tools/deploy/mysql_test/test_suite/px/r/mysql/agg.result +++ b/tools/deploy/mysql_test/test_suite/px/r/mysql/agg.result @@ -110,12 +110,12 @@ Query Plan ===================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------- -|0 |PX COORDINATOR | |9 |66 | -|1 |└─EXCHANGE OUT DISTR |:EX10001|9 |57 | -|2 | └─HASH GROUP BY | |9 |48 | -|3 | └─EXCHANGE IN DISTR | |16 |45 | -|4 | └─EXCHANGE OUT DISTR (HASH)|:EX10000|16 |37 | -|5 | └─HASH GROUP BY | |16 |20 | +|0 |PX COORDINATOR | |12 |79 | +|1 |└─EXCHANGE OUT DISTR |:EX10001|12 |68 | +|2 | └─HASH GROUP BY | |12 |55 | +|3 | └─EXCHANGE IN DISTR | |19 |51 | +|4 | └─EXCHANGE OUT DISTR (HASH)|:EX10000|19 |42 | +|5 | └─HASH GROUP BY | |19 |21 | |6 | └─PX PARTITION ITERATOR| |28 |16 | |7 | └─MERGE JOIN | |28 |16 | |8 | ├─TABLE FULL SCAN |score |28 |8 | diff --git a/tools/deploy/mysql_test/test_suite/px/r/mysql/alloc_material_for_producer_consumer_schedule_mode.result b/tools/deploy/mysql_test/test_suite/px/r/mysql/alloc_material_for_producer_consumer_schedule_mode.result index 4daa7ec65b..0bd6dc8281 100644 --- a/tools/deploy/mysql_test/test_suite/px/r/mysql/alloc_material_for_producer_consumer_schedule_mode.result +++ b/tools/deploy/mysql_test/test_suite/px/r/mysql/alloc_material_for_producer_consumer_schedule_mode.result @@ -8,7 +8,7 @@ Query Plan ==================================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------------------ -|0 |TEMP TABLE TRANSFORMATION | |1 |4 | +|0 |TEMP TABLE TRANSFORMATION | |1 |6 | |1 |├─PX COORDINATOR | |0 |3 | |2 |│ └─EXCHANGE OUT DISTR |:EX10001 |0 |3 | |3 |│ └─TEMP TABLE INSERT |TEMP1 |0 |3 | @@ -18,10 +18,10 @@ Query Plan |7 |│ └─HASH GROUP BY | |1 |2 | |8 |│ └─PX BLOCK ITERATOR | |1 |2 | |9 |│ └─TABLE FULL SCAN |t1 |1 |2 | -|10|└─PX COORDINATOR | |1 |2 | -|11| └─EXCHANGE OUT DISTR |:EX20002 |1 |2 | -|12| └─SHARED HASH JOIN | |1 |1 | -|13| ├─EXCHANGE IN DISTR | |1 |1 | +|10|└─PX COORDINATOR | |1 |3 | +|11| └─EXCHANGE OUT DISTR |:EX20002 |1 |3 | +|12| └─SHARED HASH JOIN | |1 |2 | +|13| ├─EXCHANGE IN DISTR | |1 |2 | |14| │ └─EXCHANGE OUT DISTR (BC2HOST) |:EX20001 |1 |1 | |15| │ └─SHARED HASH JOIN | |1 |1 | |16| │ ├─EXCHANGE IN DISTR | |1 |1 | @@ -73,11 +73,11 @@ Query Plan ===================================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------------------- -|0 |PX COORDINATOR | |1 |5 | +|0 |PX COORDINATOR | |1 |6 | |1 |└─EXCHANGE OUT DISTR |:EX10003|1 |5 | |2 | └─HASH GROUP BY | |1 |5 | |3 | └─EXCHANGE IN DISTR | |1 |5 | -|4 | └─EXCHANGE OUT DISTR (HASH) |:EX10002|1 |4 | +|4 | └─EXCHANGE OUT DISTR (HASH) |:EX10002|1 |5 | |5 | └─HASH GROUP BY | |1 |4 | |6 | └─SUBPLAN SCAN |VIEW1 |1 |4 | |7 | └─MERGE GROUP BY | |1 |4 | @@ -140,7 +140,7 @@ Query Plan =========================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------------- -|0 |PX COORDINATOR | |1 |5 | +|0 |PX COORDINATOR | |1 |6 | |1 |└─EXCHANGE OUT DISTR |:EX10002|1 |5 | |2 | └─MERGE GROUP BY | |1 |4 | |3 | └─PARTITION SORT | |1 |4 | diff --git a/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_basic_mysql.result b/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_basic_mysql.result index 0e7fe1fff3..b5f983095f 100644 --- a/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_basic_mysql.result +++ b/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_basic_mysql.result @@ -941,7 +941,7 @@ Query Plan ============================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |5 | +|0 |MERGE UNION DISTINCT| |2 |5 | |1 |├─TABLE FULL SCAN |t1(idx_a_b_c)|1 |3 | |2 |└─TABLE FULL SCAN |t2(idx_x_y_z)|1 |3 | ============================================================= @@ -961,7 +961,7 @@ Query Plan ============================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |5 | +|0 |MERGE UNION DISTINCT| |2 |5 | |1 |├─TABLE FULL SCAN |t1(idx_a_b_c)|1 |3 | |2 |└─TABLE FULL SCAN |t2(idx_x_y_z)|1 |3 | ============================================================= @@ -981,7 +981,7 @@ Query Plan ============================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |5 | +|0 |MERGE UNION DISTINCT| |2 |5 | |1 |├─TABLE RANGE SCAN |t1(idx_b_c_a)|1 |3 | |2 |└─TABLE RANGE SCAN |t2(idx_x_y_z)|1 |3 | ============================================================= @@ -1003,7 +1003,7 @@ Query Plan ============================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |5 | +|0 |MERGE UNION DISTINCT| |2 |5 | |1 |├─TABLE RANGE SCAN |t1(idx_b_c_a)|1 |3 | |2 |└─TABLE RANGE SCAN |t2(idx_x_y_z)|1 |3 | ============================================================= @@ -1466,7 +1466,7 @@ Query Plan ================================================================ |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------------------- -|0 |MERGE UNION DISTINCT | |1 |7 | +|0 |MERGE UNION DISTINCT | |2 |7 | |1 |├─TABLE FULL SCAN |t2(idx_x_y_z)|1 |3 | |2 |└─SORT | |1 |5 | |3 | └─MERGE JOIN | |1 |5 | @@ -3080,7 +3080,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |5 | +|0 |MERGE UNION DISTINCT| |2 |5 | |1 |├─TABLE RANGE SCAN |t6(idx_b_e_d_c_a)|1 |3 | |2 |└─TABLE FULL SCAN |tmp(idx_c1_c2_c3)|1 |3 | ================================================================= @@ -3101,7 +3101,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |5 | +|0 |MERGE UNION DISTINCT| |2 |5 | |1 |├─TABLE FULL SCAN |t6(idx_b_e_d_c_a)|1 |3 | |2 |└─TABLE FULL SCAN |tmp(idx_c1_c2_c3)|1 |3 | ================================================================= @@ -3121,7 +3121,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |10 | +|0 |MERGE UNION DISTINCT| |2 |10 | |1 |├─TABLE FULL SCAN |tmp(idx_c1_c2_c3)|1 |3 | |2 |└─SORT | |1 |8 | |3 | └─TABLE FULL SCAN |t6(idx_b_c) |1 |8 | @@ -3144,7 +3144,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |10 | +|0 |MERGE UNION DISTINCT| |2 |10 | |1 |├─TABLE FULL SCAN |tmp(idx_c1_c2_c3)|1 |3 | |2 |└─SORT | |1 |8 | |3 | └─TABLE FULL SCAN |t6(idx_b_a_c) |1 |8 | @@ -3167,7 +3167,7 @@ Query Plan ================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |5 | +|0 |MERGE UNION DISTINCT| |2 |5 | |1 |├─TABLE FULL SCAN |t6(idx_b_e_d_c_a)|1 |3 | |2 |└─TABLE FULL SCAN |tmp(idx_c1_c2_c3)|1 |3 | ================================================================= @@ -3762,7 +3762,7 @@ Optimization Info: avaiable_index_name:[t10i1, t10i2, t10i3, t10] pruned_index_name:[t10i3] unstable_index_name:[t10] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -3812,7 +3812,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t10i1, t10i2, t10i3, t10] unstable_index_name:[t10] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -3861,7 +3861,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t10i1, t10i2, t10i3, t10] pruned_index_name:[t10i3] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -3910,7 +3910,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t10i1, t10i2, t10i3, t10] pruned_index_name:[t10i3] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -3958,7 +3958,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t10i1, t10i2, t10i3, t10] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4009,7 +4009,7 @@ Optimization Info: avaiable_index_name:[t10i1, t10i2, t10i3, t10] pruned_index_name:[t10i1] unstable_index_name:[t10] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4061,7 +4061,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t10i1, t10i2, t10i3, t10] pruned_index_name:[t10i1, t10i3] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4113,7 +4113,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t10i1, t10i2, t10i3, t10] pruned_index_name:[t10i2, t10i3] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4165,7 +4165,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t10i1, t10i2, t10i3, t10] pruned_index_name:[t10i1, t10i3] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4217,7 +4217,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t10i1, t10i2, t10i3, t10] pruned_index_name:[t10i1, t10i2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4266,7 +4266,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t11i1, t11] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4315,7 +4315,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t11i1, t11] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4365,7 +4365,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t11i1, t11] pruned_index_name:[t11i1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4415,7 +4415,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t11i1, t11] pruned_index_name:[t11i1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4467,7 +4467,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t11i1, t11] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4518,7 +4518,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t11i1, t11] pruned_index_name:[t11i1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4569,7 +4569,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t11i1, t11] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4618,7 +4618,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t11i1, t11] pruned_index_name:[t11] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4668,7 +4668,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t11i1, t11] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4723,7 +4723,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t11i1, t11] pruned_index_name:[t11i1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4772,7 +4772,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t12i1, t12i2, t12] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4821,7 +4821,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t12i1, t12i2, t12] pruned_index_name:[t12i1, t12i2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4870,7 +4870,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t12i1, t12i2, t12] pruned_index_name:[t12i1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -4919,7 +4919,7 @@ Optimization Info: dop_method:Table DOP avaiable_index_name:[t12i1, t12i2, t12] pruned_index_name:[t12i1, t12i2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: diff --git a/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_complicate_mysql.result b/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_complicate_mysql.result index c3d5f81a3c..e8f75dccad 100644 --- a/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_complicate_mysql.result +++ b/tools/deploy/mysql_test/test_suite/skyline/r/mysql/skyline_complicate_mysql.result @@ -725,33 +725,30 @@ Outputs & filters: range_key([other.c1]), range(MIN ; MAX)always true explain select max(v1), tenant_id, v6 from skyline_int join other on v3 = c1 group by v5, v4, v3 order by v3, v4, v5; Query Plan -================================================================================ -|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------------------------------------- -|0 |SORT | |1 |5 | -|1 |└─HASH GROUP BY | |1 |5 | -|2 | └─MERGE JOIN | |1 |5 | -|3 | ├─TABLE FULL SCAN|other |1 |3 | -|4 | └─TABLE FULL SCAN|skyline_int(idx_v3_v4_v5_v6_v2)|1 |3 | -================================================================================ +============================================================================== +|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| +------------------------------------------------------------------------------ +|0 |MERGE GROUP BY | |1 |5 | +|1 |└─MERGE JOIN | |1 |5 | +|2 | ├─TABLE FULL SCAN|skyline_int(idx_v3_v4_v5_v6_v2)|1 |3 | +|3 | └─TABLE FULL SCAN|other |1 |3 | +============================================================================== Outputs & filters: ------------------------------------- 0 - output([T_FUN_MAX(skyline_int.v1)], [skyline_int.tenant_id], [skyline_int.v6]), filter(nil), rowset=16 - sort_keys([skyline_int.v3, ASC], [skyline_int.v4, ASC], [skyline_int.v5, ASC]) - 1 - output([skyline_int.v3], [skyline_int.v4], [skyline_int.v5], [T_FUN_MAX(skyline_int.v1)], [skyline_int.tenant_id], [skyline_int.v6]), filter(nil), rowset=16 - group([skyline_int.v5], [skyline_int.v4], [skyline_int.v3]), agg_func([T_FUN_MAX(skyline_int.v1)]) - 2 - output([skyline_int.v3], [skyline_int.v4], [skyline_int.v5], [skyline_int.tenant_id], [skyline_int.v6], [skyline_int.v1]), filter(nil), rowset=16 + group([skyline_int.v3], [skyline_int.v4], [skyline_int.v5]), agg_func([T_FUN_MAX(skyline_int.v1)]) + 1 - output([skyline_int.v3], [skyline_int.v4], [skyline_int.v5], [skyline_int.tenant_id], [skyline_int.v6], [skyline_int.v1]), filter(nil), rowset=16 equal_conds([skyline_int.v3 = other.c1]), other_conds(nil) merge_directions([ASC]) - 3 - output([other.c1]), filter(nil), rowset=16 - access([other.c1]), partitions(p0) - is_index_back=false, is_global_index=false, - range_key([other.c1]), range(MIN ; MAX)always true - 4 - output([skyline_int.v1], [skyline_int.tenant_id], [skyline_int.v3], [skyline_int.v6], [skyline_int.v5], [skyline_int.v4]), filter(nil), rowset=16 + 2 - output([skyline_int.v1], [skyline_int.tenant_id], [skyline_int.v3], [skyline_int.v6], [skyline_int.v5], [skyline_int.v4]), filter(nil), rowset=16 access([skyline_int.v1], [skyline_int.tenant_id], [skyline_int.v3], [skyline_int.v6], [skyline_int.v5], [skyline_int.v4]), partitions(p0) is_index_back=false, is_global_index=false, range_key([skyline_int.v3], [skyline_int.v4], [skyline_int.v5], [skyline_int.v6], [skyline_int.v2], [skyline_int.v1], [skyline_int.tenant_id]), range(MIN, MIN,MIN,MIN,MIN,MIN,MIN ; MAX,MAX,MAX,MAX,MAX,MAX,MAX)always true + 3 - output([other.c1]), filter(nil), rowset=16 + access([other.c1]), partitions(p0) + is_index_back=false, is_global_index=false, + range_key([other.c1]), range(MIN ; MAX)always true explain select distinct(v3) from skyline_int join other on v3 = c1 order by v3, v4, v5; Query Plan =========================================================================== @@ -928,7 +925,7 @@ Query Plan ============================================================================ |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |5 | +|0 |MERGE UNION DISTINCT| |2 |5 | |1 |├─TABLE FULL SCAN |skyline_int(idx_v3_v4_v5_v2)|1 |3 | |2 |└─TABLE FULL SCAN |other |1 |3 | ============================================================================ @@ -971,7 +968,7 @@ Query Plan =============================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |5 | +|0 |MERGE UNION DISTINCT| |2 |5 | |1 |├─TABLE FULL SCAN |skyline_int(idx_v4_v5_v6_v2_v3)|1 |3 | |2 |└─TABLE FULL SCAN |skyline_int(idx_v2_v3_v4) |1 |3 | =============================================================================== @@ -1350,7 +1347,7 @@ Query Plan =============================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |7 | +|0 |MERGE UNION DISTINCT| |2 |7 | |1 |├─MERGE JOIN | |1 |5 | |2 |│ ├─TABLE FULL SCAN |skyline_int(idx_v4_v5_v6_v2_v3)|1 |3 | |3 |│ └─TABLE FULL SCAN |other |1 |3 | @@ -1414,7 +1411,7 @@ Query Plan ===================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------- -|0 |MERGE UNION DISTINCT| |1 |5 | +|0 |MERGE UNION DISTINCT| |2 |5 | |1 |├─TABLE FULL SCAN |other|1 |3 | |2 |└─TABLE FULL SCAN |other|1 |3 | ===================================================== diff --git a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result index 4263758870..b7a5a08231 100644 --- a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result +++ b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/hash_distinct.result @@ -5964,7 +5964,8 @@ Outputs & filters: ------------------------------------- 0 - output([T_FUN_COUNT(*)]), filter(nil), rowset=256 group(nil), agg_func([T_FUN_COUNT(*)]) - 1 - output(nil), filter([result.t2c0 = 1489403758], [concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [concat(result.t0c0, '') IS NULL], [result.t0c1 = 398204275]), rowset=256 + 1 - output(nil), filter([result.t0c1 = 398204275], [result.t2c0 = 1489403758], [concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [concat(result.t0c0, '') + IS NULL]), rowset=256 access([result.t0c0], [result.t0c1], [result.t2c0], [result.t1c0]) 2 - output([t0.c0], [t0.c1], [t2.c0], [t1.c0]), filter(nil), rowset=256 limit(2147483647), offset(0) @@ -6020,7 +6021,8 @@ Outputs & filters: ------------------------------------- 0 - output([T_FUN_COUNT(*)]), filter(nil), rowset=256 group(nil), agg_func([T_FUN_COUNT(*)]) - 1 - output(nil), filter([result.t2c0 = 1489403758], [concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [concat(result.t0c0, '') IS NULL], [result.t0c1 = 398204275]), rowset=256 + 1 - output(nil), filter([result.t0c1 = 398204275], [result.t2c0 = 1489403758], [concat(result.t1c0, '') = 'q6h]zjLt)|[?S*C'], [concat(result.t0c0, '') + IS NULL]), rowset=256 access([result.t0c0], [result.t0c1], [result.t2c0], [result.t1c0]) 2 - output([t0.c0], [t0.c1], [t2.c0], [t1.c0]), filter(nil), rowset=256 limit(2147483647), offset(0) diff --git a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/subplan_filter.result b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/subplan_filter.result index f494696f0a..746a15a970 100644 --- a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/subplan_filter.result +++ b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/subplan_filter.result @@ -1251,8 +1251,8 @@ Query Plan ============================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------------------------ -|0 |SORT | |1 |6064 | -|1 |└─SUBPLAN FILTER | |1 |6064 | +|0 |SORT | |2 |6064 | +|1 |└─SUBPLAN FILTER | |2 |6064 | |2 | ├─PX COORDINATOR | |5 |9 | |3 | │ └─EXCHANGE OUT DISTR |:EX10000|5 |8 | |4 | │ └─PX BLOCK ITERATOR | |5 |6 | @@ -1775,7 +1775,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -1787,7 +1787,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1879,7 +1879,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -1887,11 +1887,11 @@ Optimization Info: physical_range_rows:7 logical_range_rows:7 index_back_rows:0 - output_rows:2 + output_rows:4 table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -1983,7 +1983,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -1995,7 +1995,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2095,7 +2095,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -2107,7 +2107,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2119,7 +2119,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2219,7 +2219,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -2231,7 +2231,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2243,7 +2243,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2343,7 +2343,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -2355,7 +2355,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2367,7 +2367,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2467,7 +2467,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -2479,7 +2479,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2491,7 +2491,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2600,7 +2600,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -2612,7 +2612,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2624,7 +2624,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2636,7 +2636,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -2787,7 +2787,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -2799,7 +2799,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2811,7 +2811,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2823,7 +2823,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2835,7 +2835,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2847,7 +2847,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2859,7 +2859,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -2871,7 +2871,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -3022,7 +3022,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -3034,7 +3034,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3046,7 +3046,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3058,7 +3058,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3070,7 +3070,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3082,7 +3082,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3094,7 +3094,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3106,7 +3106,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: @@ -3257,7 +3257,7 @@ Optimization Info: table_dop:1 dop_method:Table DOP avaiable_index_name:[t1] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:1 estimation method:[DYNAMIC SAMPLING FULL] t2: @@ -3269,7 +3269,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3281,7 +3281,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3293,7 +3293,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3305,7 +3305,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3317,7 +3317,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3329,7 +3329,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] t2: @@ -3341,7 +3341,7 @@ Optimization Info: table_dop:1 dop_method:DAS DOP avaiable_index_name:[t2] - stats version:0 + stats info:[version=0, is_locked=0, is_expired=0] dynamic sampling level:0 estimation method:[DEFAULT, STORAGE] Plan Type: diff --git a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result index a53d1fa0a8..59f56fd18d 100644 --- a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result +++ b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/table_scan.result @@ -183,7 +183,7 @@ Query Plan ================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| -------------------------------------------------- -|0 |TABLE RANGE SCAN|t1(i1)|1 |7 | +|0 |TABLE RANGE SCAN|t1(i1)|1 |8 | ================================================== Outputs & filters: ------------------------------------- @@ -251,7 +251,7 @@ Query Plan =================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |TABLE RANGE SCAN|t1(idx)|1 |7 | +|0 |TABLE RANGE SCAN|t1(idx)|2 |10 | =================================================== Outputs & filters: ------------------------------------- @@ -275,7 +275,7 @@ Query Plan =================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| --------------------------------------------------- -|0 |TABLE RANGE SCAN|t1(idx)|1 |19 | +|0 |TABLE RANGE SCAN|t1(idx)|2 |20 | =================================================== Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/subquery/r/mysql/spf_bug13044302.result b/tools/deploy/mysql_test/test_suite/subquery/r/mysql/spf_bug13044302.result index e01834e798..eedf962cd4 100644 --- a/tools/deploy/mysql_test/test_suite/subquery/r/mysql/spf_bug13044302.result +++ b/tools/deploy/mysql_test/test_suite/subquery/r/mysql/spf_bug13044302.result @@ -151,8 +151,8 @@ Query Plan ============================================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------------------------- -|0 |PX COORDINATOR | |2 |41 | -|1 |└─EXCHANGE OUT DISTR |:EX10003|2 |40 | +|0 |PX COORDINATOR | |2 |43 | +|1 |└─EXCHANGE OUT DISTR |:EX10003|2 |41 | |2 | └─HASH UNION DISTINCT | |2 |38 | |3 | ├─HASH JOIN | |1 |18 | |4 | │ ├─PX PARTITION ITERATOR | |1 |11 |