From 4182ed5513eedaa82ff8d5bdfdce9b52b98e548f Mon Sep 17 00:00:00 2001 From: ChangerR Date: Sat, 12 Oct 2024 13:47:01 +0000 Subject: [PATCH] [CP] [CP] function index gather table stats --- src/pl/sys_package/ob_dbms_stats.cpp | 121 +++++- src/pl/sys_package/ob_dbms_stats.h | 10 + src/share/stat/ob_dbms_stats_executor.cpp | 102 ++++- src/share/stat/ob_dbms_stats_gather.cpp | 18 +- src/share/stat/ob_dbms_stats_gather.h | 4 +- src/share/stat/ob_dbms_stats_utils.cpp | 390 +++++++++++++++++- src/share/stat/ob_dbms_stats_utils.h | 47 +++ .../stat/ob_incremental_stat_estimator.cpp | 69 ++++ .../stat/ob_incremental_stat_estimator.h | 12 + src/share/stat/ob_index_stats_estimator.cpp | 10 +- src/share/stat/ob_opt_stat_manager.cpp | 2 +- .../stat/ob_opt_stat_monitor_manager.cpp | 1 + src/share/stat/ob_stat_define.cpp | 8 +- src/share/stat/ob_stat_define.h | 52 ++- .../ob_optimizer_stats_gathering_op.cpp | 2 +- src/sql/optimizer/ob_join_order.cpp | 72 +++- src/sql/optimizer/ob_join_order.h | 3 +- src/sql/optimizer/ob_opt_selectivity.cpp | 40 +- src/sql/optimizer/ob_opt_selectivity.h | 6 +- .../ob_direct_load_insert_table_ctx.cpp | 2 +- 20 files changed, 911 insertions(+), 60 deletions(-) diff --git a/src/pl/sys_package/ob_dbms_stats.cpp b/src/pl/sys_package/ob_dbms_stats.cpp index ce41c0b1f5..2d97fb00c5 100644 --- a/src/pl/sys_package/ob_dbms_stats.cpp +++ b/src/pl/sys_package/ob_dbms_stats.cpp @@ -34,6 +34,7 @@ #include "sql/privilege_check/ob_ora_priv_check.h" #include "sql/ob_result_set.h" #include "share/stat/ob_dbms_stats_maintenance_window.h" +#include "sql/optimizer/ob_optimizer_util.h" namespace oceanbase { @@ -321,6 +322,7 @@ int ObDbmsStats::gather_index_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb empty_method_opt.set_null(); ObObjParam empty_cascade; empty_cascade.set_null(); + ObSEArray dummy_column_ids; if (OB_FAIL(check_statistic_table_writeable(ctx))) { LOG_WARN("failed to check tenant is restore", K(ret)); } else if (OB_FAIL(ObDbmsStatsUtils::implicit_commit_before_gather_stats(ctx))) { @@ -363,6 +365,8 @@ int ObDbmsStats::gather_index_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb } else if (!ind_stat_param.force_ && OB_FAIL(ObDbmsStatsLockUnlock::check_stat_locked(ctx, ind_stat_param))) { LOG_WARN("failed check stat locked", K(ret)); + } else if (OB_FAIL(adjust_index_column_params(ctx, ind_stat_param, dummy_column_ids))) { + LOG_WARN("failed adjust index column params", K(ret)); } else if (OB_FAIL(ObDbmsStatsExecutor::gather_index_stats(ctx, ind_stat_param))) { LOG_WARN("failed to gather table stats", K(ret)); } else if (OB_FAIL(update_stat_cache(ctx.get_my_session()->get_rpc_tenant_id(), ind_stat_param))) { @@ -380,6 +384,10 @@ int ObDbmsStats::gather_table_index_stats(ObExecContext &ctx, int ret = OB_SUCCESS; share::schema::ObSchemaGetterGuard *schema_guard = ctx.get_virtual_table_ctx().schema_guard_; int64_t start_time = ObTimeUtility::current_time(); + ObSEArray no_deduce_column_ids; + if (OB_FAIL(get_no_deduce_basic_stats_column_ids(data_param, no_deduce_column_ids))) { + LOG_WARN("failed to get no deduce basic stats column ids", K(ret)); + } for (int64_t i = 0; OB_SUCC(ret) && i < no_gather_index_ids.count(); ++i) { StatTable stat_table; stat_table.database_id_ = data_param.db_id_; @@ -408,6 +416,7 @@ int ObDbmsStats::gather_table_index_stats(ObExecContext &ctx, index_param.part_stat_param_.assign_without_part_type(data_param.part_stat_param_); index_param.subpart_stat_param_.assign_without_part_type(data_param.subpart_stat_param_); index_param.data_table_name_ = data_param.tab_name_; + index_param.data_table_id_ = data_param.table_id_; if (index_param.force_ && OB_FAIL(ObDbmsStatsLockUnlock::fill_stat_locked(ctx, index_param))) { LOG_WARN("failed fill stat locked", K(ret)); @@ -419,6 +428,8 @@ int ObDbmsStats::gather_table_index_stats(ObExecContext &ctx, data_param.duration_time_, index_param.duration_time_))) { LOG_WARN("failed to get valid duration time", K(ret)); + } else if (OB_FAIL(adjust_index_column_params(ctx, index_param, no_deduce_column_ids))) { + LOG_WARN("failed to adjust index column params", K(ret)); } else if (OB_FAIL(ObDbmsStatsExecutor::gather_index_stats(ctx, index_param))) { LOG_WARN("failed to gather table stats", K(ret)); } else if (OB_FAIL(update_stat_cache(ctx.get_my_session()->get_rpc_tenant_id(), index_param))) { @@ -1456,9 +1467,9 @@ int ObDbmsStats::export_schema_stats(ObExecContext &ctx, ParamStore ¶ms, ObO { int ret = OB_SUCCESS; UNUSED(result); - SMART_VAR(ObTableStatParam, global_param) { + SMART_VARS_2((ObTableStatParam, global_param), + (ObTableStatParam, stat_table_param)) { global_param.allocator_ = &ctx.get_allocator(); - ObTableStatParam stat_table_param; stat_table_param.allocator_ = &ctx.get_allocator(); const share::schema::ObTableSchema *table_schema = NULL; ObSEArray table_ids; @@ -1821,9 +1832,9 @@ int ObDbmsStats::import_schema_stats(ObExecContext &ctx, ParamStore ¶ms, ObO { int ret = OB_SUCCESS; UNUSED(result); - SMART_VAR(ObTableStatParam, global_param) { + SMART_VARS_2((ObTableStatParam, global_param), + (ObTableStatParam, stat_table_param)) { global_param.allocator_ = &ctx.get_allocator(); - ObTableStatParam stat_table_param; stat_table_param.allocator_ = &ctx.get_allocator(); const share::schema::ObTableSchema *table_schema = NULL; ObSEArray table_ids; @@ -3457,6 +3468,8 @@ int ObDbmsStats::parse_table_part_info(ObExecContext &ctx, } else if (need_parse_col_group && OB_FAIL(init_column_group_stat_param(*table_schema, param.column_group_params_))) { LOG_WARN("failed to init column group stat param", K(ret)); + } else if (OB_FAIL(adjust_text_column_basic_stats(ctx, *table_schema, param))) { + LOG_WARN("failed to adjust text column basic stats", K(ret)); } } return ret; @@ -3493,6 +3506,8 @@ int ObDbmsStats::parse_table_part_info(ObExecContext &ctx, } else if (need_parse_col_group && OB_FAIL(init_column_group_stat_param(*table_schema, param.column_group_params_))) { LOG_WARN("failed to init column group stat param", K(ret)); + } else if (OB_FAIL(adjust_text_column_basic_stats(ctx, *table_schema, param))) { + LOG_WARN("failed to adjust text column basic stats", K(ret)); } else { param.table_id_ = table_schema->get_table_id(); param.ref_table_type_ = table_schema->get_table_type(); @@ -3620,6 +3635,7 @@ int ObDbmsStats::init_column_stat_params(ObIAllocator &allocator, col_param.set_size_manual(); col_param.bucket_num_ = -1; col_param.column_attribute_ = 0; + col_param.column_usage_flag_ = 0; if (lib::is_oracle_mode() && col->get_meta_type().is_varbinary_or_binary()) { //oracle don't have this type. but agent table will have this type, such as "SYS"."ALL_VIRTUAL_COLUMN_REAL_AGENT" } else { @@ -3648,6 +3664,10 @@ int ObDbmsStats::init_column_stat_params(ObIAllocator &allocator, if (!col->is_nullable()) { col_param.set_is_not_null_column(); } + if (lib::is_mysql_mode() && + col->get_meta_type().get_type_class() == ColumnTypeClass::ObTextTC) { + col_param.set_is_text_column(); + } if (OB_SUCC(ret) && OB_FAIL(column_params.push_back(col_param))) { LOG_WARN("failed to push back column param", K(ret)); } @@ -3711,7 +3731,7 @@ int ObDbmsStats::set_default_column_params(ObIArray &column_p int ret = OB_SUCCESS; for (int64_t i = 0; OB_SUCC(ret) && i < column_params.count(); ++i) { ObColumnStatParam ¶m = column_params.at(i); - if (param.is_valid_opt_col()) { + if (param.is_valid_opt_col() && !param.is_text_column()) { param.set_need_basic_stat(); param.set_size_auto(); param.column_usage_flag_ = 0; @@ -4725,7 +4745,7 @@ int ObDbmsStats::parser_for_all_clause(const ParseNode *for_all_node, ObColumnStatParam &col_param = column_params.at(i); if (!is_match_column_option(col_param, for_all_conf)) { // do nothing - } else if (!col_param.is_valid_opt_col()) { + } else if (!col_param.is_valid_opt_col() || col_param.is_text_column()) { // do nothing } else if (OB_FAIL(compute_bucket_num(column_params.at(i), size_conf))) { LOG_WARN("failed to compute histogram size", K(ret)); @@ -5599,6 +5619,7 @@ int ObDbmsStats::get_all_table_ids_in_database(ObExecContext &ctx, } else if (OB_FAIL(ObDbmsStatsUtils::check_is_stat_table(*schema_guard, stat_param.tenant_id_, table_schemas.at(i)->get_table_id(), + false, is_valid))) { LOG_WARN("failed to check is stat table", K(ret)); } else if (!is_valid) { @@ -5756,7 +5777,7 @@ int ObDbmsStats::do_gather_table_stats(sql::ObExecContext &ctx, if (OB_ISNULL(schema_guard)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(schema_guard)); - } else if (OB_FAIL(ObDbmsStatsUtils::check_is_stat_table(*schema_guard, tenant_id, table_id, is_valid))) { + } else if (OB_FAIL(ObDbmsStatsUtils::check_is_stat_table(*schema_guard, tenant_id, table_id, false, is_valid))) { LOG_WARN("failed to check sy table validity", K(ret)); } else if (!is_valid) { // only gather statistics for following tables: @@ -7160,6 +7181,47 @@ int ObDbmsStats::async_gather_table_stats(sql::ObExecContext &ctx, } return ret; } +int ObDbmsStats::adjust_index_column_params(ObExecContext &ctx, + ObTableStatParam &index_param, + ObIArray &filter_column_ids) +{ + int ret = OB_SUCCESS; + ObSEArray function_column_ids; + for (int64_t i = 0; i < index_param.column_params_.count(); ++i) { + if (index_param.column_params_.at(i).is_hidden_column()) { + index_param.column_params_.at(i).set_need_basic_stat(); + if (OB_FAIL(function_column_ids.push_back(index_param.column_params_.at(i).column_id_))) { + LOG_WARN("failed to push back column id", K(ret)); + } + } else { + index_param.column_params_.at(i).unset_need_basic_stat(); + } + } + if (OB_SUCC(ret) && lib::is_mysql_mode() && !function_column_ids.empty()) { + if (OB_FAIL(ObDbmsStatsUtils::get_prefix_index_text_pairs(ctx.get_virtual_table_ctx().schema_guard_, + index_param.tenant_id_, + index_param.data_table_id_, + function_column_ids, + filter_column_ids, + index_param.prefix_column_pairs_))) { + LOG_WARN("failed to get prefix index text pairs", K(ret)); + } + } + return ret; +} + +int ObDbmsStats::get_no_deduce_basic_stats_column_ids(const ObTableStatParam ¶m, ObIArray &column_ids) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; i < param.column_params_.count(); ++i) { + if (param.column_params_.at(i).need_basic_stat()) { + if (OB_FAIL(column_ids.push_back(param.column_params_.at(i).column_id_))) { + LOG_WARN("failed to push back column ids", K(ret)); + } + } + } + return ret; +} int ObDbmsStats::do_async_gather_table_stats(sql::ObExecContext &ctx, const uint64_t tenant_id, @@ -7176,7 +7238,7 @@ int ObDbmsStats::do_async_gather_table_stats(sql::ObExecContext &ctx, ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(schema_guard)); } else if (OB_FAIL(ObDbmsStatsUtils::check_is_stat_table(*schema_guard, tenant_id, - async_table.table_id_, is_valid))) { + async_table.table_id_, false, is_valid))) { LOG_WARN("failed to check sy table validity", K(ret)); } else if (!is_valid) { // only gather statistics for following tables: @@ -7322,6 +7384,49 @@ int ObDbmsStats::adjust_async_gather_stat_option(ObExecContext &ctx, LOG_TRACE("succeed to adjust auto gather stat option", K(async_partition_ids), K(param)); return ret; } +int ObDbmsStats::adjust_text_column_basic_stats(ObExecContext &ctx, + const share::schema::ObTableSchema &schema, + ObTableStatParam ¶m) +{ + int ret = OB_SUCCESS; + ObSEArray pairs; + ObSEArray text_column_ids; + ObSEArray auto_columns; + for (int64_t i = 0; OB_SUCC(ret) && i < param.column_params_.count(); ++i) { + if (param.column_params_.at(i).is_text_column()) { + if (OB_FAIL(auto_columns.push_back(¶m.column_params_.at(i)))) { + LOG_WARN("failed to push back auto text columns", K(ret)); + } + } + } + if (OB_SUCC(ret) && !auto_columns.empty()) { + if (OB_FAIL(ObDbmsStatsUtils::get_all_prefix_index_text_pairs(schema, + pairs))) { + LOG_WARN("failed to get all prefix index text pairs", K(ret)); + } else if (OB_FAIL(ObOptStatMonitorManager::flush_database_monitoring_info(ctx, true, false))) { + LOG_WARN("failed to do flush database monitoring info", K(ret)); + } else if (OB_FAIL(ObOptStatMonitorManager::get_column_usage_from_table( + ctx, auto_columns, param.tenant_id_, param.table_id_))) { + LOG_WARN("failed to get column usage from table", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < pairs.count(); ++i) { + if (OB_FAIL(text_column_ids.push_back(pairs.at(i).related_column_id_))) { + LOG_WARN("failed to push back index", K(ret)); + } + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < auto_columns.count(); ++i) { + ObColumnStatParam *col_stat = auto_columns.at(i); + if (ObOptimizerUtil::find_item(text_column_ids, + col_stat->column_id_)) { + // do nothing + } else if (col_stat->column_usage_flag_ > 0) { + col_stat->unset_text_column(); + } + } + } + return ret; +} } } diff --git a/src/pl/sys_package/ob_dbms_stats.h b/src/pl/sys_package/ob_dbms_stats.h index 82d5801a40..0a6f64e398 100644 --- a/src/pl/sys_package/ob_dbms_stats.h +++ b/src/pl/sys_package/ob_dbms_stats.h @@ -657,6 +657,16 @@ private: static int adjust_async_gather_stat_option(ObExecContext &ctx, const ObIArray &async_partition_ids, ObTableStatParam ¶m); + static int adjust_index_column_params(ObExecContext &ctx, + ObTableStatParam &index_param, + ObIArray &filter_column_ids); + + static int get_no_deduce_basic_stats_column_ids(const ObTableStatParam ¶m, ObIArray &column_ids); + + static int adjust_text_column_basic_stats(ObExecContext &ctx, + const share::schema::ObTableSchema &schema, + ObTableStatParam ¶m); + }; } diff --git a/src/share/stat/ob_dbms_stats_executor.cpp b/src/share/stat/ob_dbms_stats_executor.cpp index 7273e6c864..026318e17d 100644 --- a/src/share/stat/ob_dbms_stats_executor.cpp +++ b/src/share/stat/ob_dbms_stats_executor.cpp @@ -1230,11 +1230,16 @@ int ObDbmsStatsExecutor::gather_index_stats(ObExecContext &ctx, int ret = OB_SUCCESS; ObArray all_index_stats; ObArray part_index_stats; - ObArray empty_cstats; + ObArray all_column_stats; + ObSEArray subpart_opt_stats; + ObSEArray part_opt_stats; + ObSEArray global_opt_stats; + ObOptStat global_opt_stat; ObOptStatGatherParam gather_param; PartitionIdBlockMap partition_id_block_map; bool use_column_store = false; bool use_split_part = false; + ObSEArray copy_stats; LOG_TRACE("begin gather index stats", K(param)); if (OB_FAIL(partition_id_block_map.create(10000, ObModIds::OB_HASH_BUCKET_TABLE_STATISTICS, @@ -1260,7 +1265,7 @@ int ObDbmsStatsExecutor::gather_index_stats(ObExecContext &ctx, /*do nothing*/ } else if (OB_FAIL(gather_param.partition_infos_.assign(param.subpart_infos_))) { LOG_WARN("failed to assign", K(ret)); - } else if (OB_FAIL(ObDbmsStatsGather::gather_index_stats(ctx, gather_param, all_index_stats))) { + } else if (OB_FAIL(ObDbmsStatsGather::gather_index_stats(ctx, gather_param, subpart_opt_stats, all_index_stats, all_column_stats))) { LOG_WARN("failed to gather subpart index stats", K(ret)); } else {/*do nothing*/} } @@ -1272,12 +1277,26 @@ int ObDbmsStatsExecutor::gather_index_stats(ObExecContext &ctx, if (OB_UNLIKELY(all_index_stats.empty())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(param.approx_part_infos_), K(all_index_stats)); - } else if (OB_FAIL(ObIncrementalStatEstimator::derive_part_index_stat_by_subpart_index_stats(param, - all_index_stats, - part_index_stats))) { - LOG_WARN("failed to derive part index stat by subpart index stats", K(ret)); - } else if (OB_FAIL(append(all_index_stats, part_index_stats))) { - LOG_WARN("failed to append", K(ret)); + } else if (all_column_stats.empty()) { + if (OB_FAIL(ObIncrementalStatEstimator::derive_part_index_stat_by_subpart_index_stats(param, + all_index_stats, + part_index_stats))) { + LOG_WARN("failed to derive part index stat by subpart index stats", K(ret)); + } else if (OB_FAIL(append(all_index_stats, part_index_stats))) { + LOG_WARN("failed to append", K(ret)); + } + } else { + if (OB_FAIL(ObIncrementalStatEstimator::derive_part_index_column_stat_by_subpart_index(ctx, + *param.allocator_, + param, + subpart_opt_stats, + part_opt_stats))) { + LOG_WARN("failed to derived part"); + } else if (OB_FAIL(ObDbmsStatsUtils::calssify_opt_stat(part_opt_stats, + all_index_stats, + all_column_stats))) { + LOG_WARN("failed to classify opt stat", K(ret)); + } } } else if (OB_UNLIKELY(!param.approx_part_infos_.empty())) { ret = OB_ERR_UNEXPECTED; @@ -1286,7 +1305,7 @@ int ObDbmsStatsExecutor::gather_index_stats(ObExecContext &ctx, if (OB_SUCC(ret) && !param.part_infos_.empty()) { if (OB_FAIL(gather_param.partition_infos_.assign(param.part_infos_))) { LOG_WARN("failed to assign", K(ret)); - } else if (OB_FAIL(ObDbmsStatsGather::gather_index_stats(ctx, gather_param, part_index_stats))) { + } else if (OB_FAIL(ObDbmsStatsGather::gather_index_stats(ctx, gather_param, part_opt_stats, part_index_stats, all_column_stats))) { LOG_WARN("failed to gather part index stats", K(ret)); } else if (OB_FAIL(append(all_index_stats, part_index_stats))) { LOG_WARN("failed to append", K(ret)); @@ -1297,17 +1316,70 @@ int ObDbmsStatsExecutor::gather_index_stats(ObExecContext &ctx, gather_param.stat_level_ = TABLE_LEVEL; gather_param.partition_infos_.reset(); if (param.global_stat_param_.gather_approx_ && !part_index_stats.empty()) {//approx global stats base on part stats - if (OB_FAIL(ObIncrementalStatEstimator::derive_global_index_stat_by_part_index_stats(param, - part_index_stats, - all_index_stats))) { - LOG_WARN("failed to derive global index stat by part index stats", K(ret)); + if (all_column_stats.empty()) { + if (OB_FAIL(ObIncrementalStatEstimator::derive_global_index_stat_by_part_index_stats(param, + part_index_stats, + all_index_stats))) { + LOG_WARN("failed to derive global index stat by part index stats", K(ret)); + } + } else { + if (OB_FAIL(ObIncrementalStatEstimator::derive_global_index_column_stat_by_part_index(ctx, + *param.allocator_, + param, + part_opt_stats, + global_opt_stat))) { + LOG_WARN("failed to derive global index column stat by part index", K(ret)); + } else if (OB_FAIL(global_opt_stats.push_back(global_opt_stat))) { + LOG_WARN("failed to push back opt stats", K(ret)); + }else if (OB_FAIL(ObDbmsStatsUtils::calssify_opt_stat(global_opt_stats, + all_index_stats, + all_column_stats))) { + LOG_WARN("failed to classify opt stat", K(ret)); + } } - } else if (OB_FAIL(ObDbmsStatsGather::gather_index_stats(ctx, gather_param, all_index_stats))) { + } else if (OB_FAIL(ObDbmsStatsGather::gather_index_stats(ctx, gather_param, global_opt_stats, all_index_stats, all_column_stats))) { LOG_WARN("failed to gather index stats", K(ret)); } } + + if (OB_FAIL(ret)) { + } else if (all_column_stats.empty()) { + // do nothing + } else if (param.is_global_index_ ) { + if (OB_FAIL(ObDbmsStatsUtils::copy_global_index_prefix_stats_to_text( + ctx.get_virtual_table_ctx().schema_guard_, + *param.allocator_, + all_column_stats, + param.prefix_column_pairs_, + param.tenant_id_, + param.data_table_id_, + copy_stats))) { + LOG_WARN("failed to copy global index prefix stats to text", K(ret)); + } else if (OB_FAIL(append(all_column_stats, copy_stats))) { + LOG_WARN("failed to append copy stats", K(ret)); + } + } else if (OB_FAIL(ObDbmsStatsUtils::deduce_index_column_stat_to_table( + ctx.get_virtual_table_ctx().schema_guard_, + param.tenant_id_, + param.table_id_, + param.data_table_id_, + param.part_level_, + all_column_stats))) { + LOG_WARN("failed to trans index column stat to table", K(ret)); + } else if (param.prefix_column_pairs_.empty()) { + // do nothing + } else if (OB_FAIL(ObDbmsStatsUtils::copy_local_index_prefix_stats_to_text( + *param.allocator_, + all_column_stats, + param.prefix_column_pairs_, + copy_stats))) { + LOG_WARN("failed to copy local index prefix stats to text", K(ret)); + } else if (OB_FAIL(append(all_column_stats, copy_stats))) { + LOG_WARN("failed to append copy stats", K(ret)); + } + if (OB_SUCC(ret)) { - if (OB_FAIL(ObDbmsStatsUtils::split_batch_write(ctx, all_index_stats, empty_cstats, true))) { + if (OB_FAIL(ObDbmsStatsUtils::split_batch_write(ctx, all_index_stats, all_column_stats, true))) { LOG_WARN("failed to split batch write", K(ret)); } else {/*do nothing*/} } diff --git a/src/share/stat/ob_dbms_stats_gather.cpp b/src/share/stat/ob_dbms_stats_gather.cpp index bbef2a5b7d..34883a4003 100644 --- a/src/share/stat/ob_dbms_stats_gather.cpp +++ b/src/share/stat/ob_dbms_stats_gather.cpp @@ -222,10 +222,11 @@ int ObDbmsStatsGather::init_opt_stat(ObIAllocator &allocator, int ObDbmsStatsGather::gather_index_stats(ObExecContext &ctx, const ObOptStatGatherParam ¶m, - ObIArray &all_index_stats) + ObIArray &opt_stats, + ObIArray &all_index_stats, + ObIArray &all_column_stats) { int ret = OB_SUCCESS; - ObSEArray opt_stats; LOG_TRACE("begin to gather index stats", K(param)); if (OB_ISNULL(param.allocator_)) { ret = OB_ERR_UNEXPECTED; @@ -238,15 +239,10 @@ int ObDbmsStatsGather::gather_index_stats(ObExecContext &ctx, ObIndexStatsEstimator index_est(ctx, *param.allocator_); if (OB_FAIL(index_est.estimate(param, opt_stats))) { LOG_WARN("failed to estimate basic statistics", K(ret)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < opt_stats.count(); ++i) { - if (OB_ISNULL(opt_stats.at(i).table_stat_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(opt_stats.at(i).table_stat_)); - } else if (OB_FAIL(all_index_stats.push_back(opt_stats.at(i).table_stat_))) { - LOG_WARN("failed to append", K(ret)); - } else {/*do nothing*/} - } + } else if (OB_FAIL(ObDbmsStatsUtils::calssify_opt_stat(opt_stats, + all_index_stats, + all_column_stats))) { + LOG_WARN("failed to classify opt stat", K(ret)); } } return ret; diff --git a/src/share/stat/ob_dbms_stats_gather.h b/src/share/stat/ob_dbms_stats_gather.h index 9ee73747ea..c66c2c2f5b 100644 --- a/src/share/stat/ob_dbms_stats_gather.h +++ b/src/share/stat/ob_dbms_stats_gather.h @@ -32,7 +32,9 @@ public: static int gather_index_stats(ObExecContext &ctx, const ObOptStatGatherParam ¶m, - ObIArray &all_index_stats); + ObIArray &opt_stats, + ObIArray &all_index_stats, + ObIArray &all_column_stats); private: static int init_opt_stats(ObIAllocator &allocator, diff --git a/src/share/stat/ob_dbms_stats_utils.cpp b/src/share/stat/ob_dbms_stats_utils.cpp index 25763824c5..0a132f2284 100644 --- a/src/share/stat/ob_dbms_stats_utils.cpp +++ b/src/share/stat/ob_dbms_stats_utils.cpp @@ -28,6 +28,8 @@ #include "share/stat/ob_dbms_stats_preferences.h" #include "observer/ob_sql_client_decorator.h" #include "share/stat/ob_dbms_stats_executor.h" +#include "lib/utility/ob_fast_convert.h" +#include "sql/optimizer/ob_optimizer_util.h" #ifdef OB_BUILD_ORACLE_PL #include "pl/sys_package/ob_json_pl_utils.h" @@ -167,6 +169,7 @@ int ObDbmsStatsUtils::check_table_read_write_valid(const uint64_t tenant_id, boo int ObDbmsStatsUtils::check_is_stat_table(share::schema::ObSchemaGetterGuard &schema_guard, const uint64_t tenant_id, const int64_t table_id, + bool need_index_table, bool &is_valid) { int ret = OB_SUCCESS; @@ -185,7 +188,8 @@ int ObDbmsStatsUtils::check_is_stat_table(share::schema::ObSchemaGetterGuard &sc } else {//check user table is_valid = table_schema->is_user_table() || table_schema->is_external_table() - || table_schema->is_mlog_table(); + || table_schema->is_mlog_table() + || (need_index_table && table_schema->is_index_table()); } return ret; } @@ -1108,6 +1112,9 @@ int ObDbmsStatsUtils::prepare_gather_stat_param(const ObTableStatParam ¶m, gather_param.hist_sample_info_.is_block_sample_ = param.hist_sample_info_.is_block_sample_; gather_param.hist_sample_info_.sample_type_ = param.hist_sample_info_.sample_type_; gather_param.hist_sample_info_.sample_value_ = param.hist_sample_info_.sample_value_; + gather_param.is_global_index_ = param.is_global_index_; + gather_param.data_table_id_ = param.data_table_id_; + gather_param.part_level_ = param.part_level_; return gather_param.column_group_params_.assign(param.column_group_params_); } @@ -1453,6 +1460,138 @@ int ObDbmsStatsUtils::check_can_async_gather_stats(sql::ObExecContext &ctx) } return ret; } +int ObDbmsStatsUtils::build_index_part_to_table_part_maps(share::schema::ObSchemaGetterGuard *schema_guard, + uint64_t tenant_id, + uint64_t index_table_id, + common::hash::ObHashMap &part_id_map) +{ + int ret = OB_SUCCESS; + const ObTableSchema *table_schema = nullptr; + const ObTableSchema *index_schema = nullptr; + if (OB_ISNULL(schema_guard)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(tenant_id, + index_table_id, + index_schema))) { + LOG_WARN("failed to get simple table schema", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(tenant_id, + index_schema->get_data_table_id(), + table_schema))) { + LOG_WARN("failed to get simple table schema", K(ret)); + } else if (OB_UNLIKELY(index_schema->is_global_index_table())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("global index cannot build part maps", K(ret), K(index_table_id)); + } else if (!table_schema->is_partitioned_table()) { + // do nothing + } else { + const bool is_twopart = (table_schema->get_part_level() == share::schema::PARTITION_LEVEL_TWO); + ObCheckPartitionMode check_partition_mode = CHECK_PARTITION_MODE_NORMAL; + ObPartIterator table_itr(*table_schema, check_partition_mode); + ObPartIterator index_itr(*index_schema, check_partition_mode); + const ObPartition *table_part = NULL; + const ObPartition *index_part = NULL; + while (OB_SUCC(ret) && OB_SUCC(table_itr.next(table_part))) { + if (OB_FAIL(index_itr.next(index_part))) { + LOG_WARN("get unexpect end", K(ret)); + } else if (OB_FAIL(part_id_map.set_refactored(index_part->get_part_id(), + table_part->get_part_id(), + 1))) { + LOG_WARN("failed to set refactored", K(ret)); + } else if (is_twopart && + OB_FAIL(build_sub_part_maps(table_schema, + index_schema, + index_part, + table_part, + check_partition_mode, + part_id_map))) { + LOG_WARN("failed to build sub part maps", K(ret)); + } + } + ret = (ret == OB_ITER_END ? OB_SUCCESS : ret); + } + return ret; +} + +int ObDbmsStatsUtils::build_sub_part_maps(const ObTableSchema* table_schema, + const ObTableSchema* index_schema, + const ObPartition *index_part, + const ObPartition *table_part, + ObCheckPartitionMode mode, + common::hash::ObHashMap &part_id_map) +{ + int ret = OB_SUCCESS; + ObSubPartIterator table_itr(*table_schema, *table_part, mode); + ObSubPartIterator index_itr(*index_schema, *index_part, mode); + const ObSubPartition *index_sub_part = NULL; + const ObSubPartition *table_sub_part = NULL; + while (OB_SUCC(ret) && OB_SUCC(table_itr.next(table_sub_part))) { + if (OB_FAIL(index_itr.next(index_sub_part))) { + LOG_WARN("get unexpected end", K(ret)); + } else if (OB_FAIL(part_id_map.set_refactored(index_sub_part->get_sub_part_id(), + table_sub_part->get_sub_part_id(), + 1))) { + LOG_WARN("failed to set refactored", K(ret), K(index_sub_part->get_sub_part_id()), K(table_sub_part->get_sub_part_id())); + } + } + ret = (ret == OB_ITER_END ? OB_SUCCESS : ret); + return ret; +} + +int ObDbmsStatsUtils::deduce_index_column_stat_to_table(share::schema::ObSchemaGetterGuard *schema_guard, + uint64_t tenant_id, + uint64_t index_table_id, + uint64_t data_table_id, + ObPartitionLevel part_level, + ObIArray &all_column_stats) +{ + int ret = OB_SUCCESS; + if (part_level == schema::ObPartitionLevel::PARTITION_LEVEL_ZERO) { + for (int64_t i = 0; OB_SUCC(ret) && i < all_column_stats.count(); ++i) { + if (OB_ISNULL(all_column_stats.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (all_column_stats.at(i)->get_table_id() == index_table_id) { + all_column_stats.at(i)->set_table_id(data_table_id); + all_column_stats.at(i)->set_partition_id(data_table_id); + } + } + } else { + common::hash::ObHashMap part_ids; + if (OB_FAIL(part_ids.create(128, "DbmsStatsParts"))) { + LOG_WARN("failed to create map", K(ret)); + } else if (OB_FAIL(build_index_part_to_table_part_maps(schema_guard, + tenant_id, + index_table_id, + part_ids))) { + LOG_WARN("failed to build index part to table maps", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < all_column_stats.count(); ++i) { + ObObjectID part_id; + if (OB_ISNULL(all_column_stats.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (all_column_stats.at(i)->get_table_id() != index_table_id) { + // do nothing + } else if (all_column_stats.at(i)->get_partition_id() == -1) { + all_column_stats.at(i)->set_table_id(data_table_id); + } else if (OB_FAIL(part_ids.get_refactored(all_column_stats.at(i)->get_partition_id(), + part_id))) { + if (OB_HASH_NOT_EXIST == ret) { + LOG_WARN("cannot trans column part ids", K(ret), K(all_column_stats.at(i)->get_partition_id())); + ret = OB_SUCCESS; + } else { + LOG_WARN("cannot find part ids", K(ret), K(all_column_stats.at(i)->get_partition_id())); + } + } else { + all_column_stats.at(i)->set_table_id(data_table_id); + all_column_stats.at(i)->set_partition_id(part_id); + } + } + } + } + return ret; +} int ObDbmsStatsUtils::cancel_async_gather_stats(sql::ObExecContext &ctx) { @@ -1480,6 +1619,70 @@ int ObDbmsStatsUtils::cancel_async_gather_stats(sql::ObExecContext &ctx) } return ret; } +int ObDbmsStatsUtils::get_prefix_index_substr_length(const share::schema::ObColumnSchemaV2 &col, + int64_t &length) +{ + int ret = OB_SUCCESS; + bool valid = true; + if (col.is_prefix_column()) { + const ObString &column_name = col.get_column_name_str(); + int64_t index = 8; + while (index < column_name.length() && *(column_name.ptr() + index) != '_') { + ++index; + } + length = ObFastAtoi::atoi(column_name.ptr() + 8, column_name.ptr() + index, valid); + if (!valid) { + length = 0; + } + } + return ret; +} + +int ObDbmsStatsUtils::get_prefix_index_text_pairs(share::schema::ObSchemaGetterGuard *schema_guard, + uint64_t tenant_id, + uint64_t data_table_id, + ObIArray &func_idxs, + ObIArray &ignore_cols, + ObIArray &pairs) +{ + int ret = OB_SUCCESS; + const share::schema::ObTableSchema *table_schema = NULL; + ObSEArray all_text_pairs; + if (OB_ISNULL(schema_guard)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(ObOptimizerUtil::remove_item(func_idxs, ignore_cols))) { + LOG_WARN("failed to remove item", K(ret)); + } else if (func_idxs.empty()) { + // do nothing + } else if (OB_FAIL(schema_guard->get_table_schema(tenant_id, + data_table_id, + table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(get_all_prefix_index_text_pairs(*table_schema, + all_text_pairs))) { + LOG_WARN("failed to get all prefix index text pairs", K(ret)); + } else if (all_text_pairs.empty()) { + // do nothing + } else { + bool find = false; + for (int64_t i = 0; OB_SUCC(ret) && i < func_idxs.count(); ++i) { + find = false; + for (int64_t j = 0; OB_SUCC(ret) && !find && j < all_text_pairs.count(); ++j) { + if (all_text_pairs.at(j).prefix_column_id_ == func_idxs.at(i)) { + find = true; + if (OB_FAIL(pairs.push_back(all_text_pairs.at(j)))) { + LOG_WARN("failed to push back pairs", K(ret)); + } + } + } + } + } + return ret; +} int ObDbmsStatsUtils::fetch_need_cancel_async_gather_stats_task(ObIAllocator &allocator, sql::ObExecContext &ctx, @@ -1535,6 +1738,191 @@ int ObDbmsStatsUtils::fetch_need_cancel_async_gather_stats_task(ObIAllocator &al } return ret; } +int ObDbmsStatsUtils::get_all_prefix_index_text_pairs(const share::schema::ObTableSchema &table_schema, + ObIArray &pairs) +{ + int ret = OB_SUCCESS; + ObSEArray ref_column_ids; + int64_t prefix_length = 0; + common::hash::ObHashMap prefix_columns; + if (OB_FAIL(prefix_columns.create(64, "DbmsStatsPrefix"))) { + LOG_WARN("failed to create map", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < table_schema.get_column_count(); ++i) { + const share::schema::ObColumnSchemaV2 *col = table_schema.get_column_schema_by_idx(i); + const share::schema::ObColumnSchemaV2 *ref_col = NULL; + int64_t pair_index = 0; + if (OB_ISNULL(col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (col->is_prefix_column()) { + ref_column_ids.reuse(); + if (OB_FAIL(col->get_cascaded_column_ids(ref_column_ids))) { + LOG_WARN("failed to get cascaded column ids", K(ret)); + } else if (ref_column_ids.count() != 1) { + // do nothing + } else if (OB_ISNULL(ref_col = table_schema.get_column_schema(ref_column_ids.at(0)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(ObDbmsStatsUtils::get_prefix_index_substr_length(*col, prefix_length))) { + LOG_WARN("failed to get prefix index substr", K(ret)); + } else if (OB_FAIL(prefix_columns.get_refactored(ref_col->get_column_id(), pair_index))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + PrefixColumnPair new_pair(col->get_column_id(), ref_col->get_column_id(), prefix_length); + new_pair.related_column_meta_ = ref_col->get_meta_type(); + if (OB_FAIL(pairs.push_back(new_pair))) { + LOG_WARN("failed to push back pairs", K(ret)); + } else if (OB_FAIL(prefix_columns.set_refactored(ref_col->get_column_id(), pairs.count() - 1))) { + LOG_WARN("failed to set refacotred", K(ret)); + } + } + } else if (pairs.at(pair_index).prefix_length_ >= prefix_length) { + // do nothing + } else { + pairs.at(pair_index).prefix_column_id_ = col->get_column_id(); + pairs.at(pair_index).related_column_id_ = ref_col->get_column_id(); + pairs.at(pair_index).prefix_length_ = prefix_length; + pairs.at(pair_index).related_column_meta_ = ref_col->get_meta_type(); + } + } + } + return ret; +} + +int ObDbmsStatsUtils::copy_local_index_prefix_stats_to_text(ObIAllocator &allocator, + const ObIArray &column_stats, + const ObIArray &pairs, + ObIArray ©_stats) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < pairs.count(); ++i) { + const PrefixColumnPair &pair = pairs.at(i); + for (int64_t j = 0; OB_SUCC(ret) && j < column_stats.count(); ++j) { + const ObOptColumnStat *col_stat = column_stats.at(j); + if (OB_ISNULL(col_stat)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unpexcted null", K(ret)); + } else if (col_stat->get_column_id() == pair.prefix_column_id_) { + ObOptColumnStat *text_col_stat = NULL; + void *ptr = NULL; + if (OB_FAIL(copy_prefix_column_stat_to_text(allocator, + *col_stat, + pair.related_column_meta_, + text_col_stat))) { + LOG_WARN("failed to copy prefix column stat to text", K(ret)); + } else if (OB_ISNULL(text_col_stat)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(copy_stats.push_back(text_col_stat))) { + LOG_WARN("failed to push back copy stats", K(ret)); + } else { + text_col_stat->set_column_id(pair.related_column_id_); + } + } + } + } + return ret; +} + +int ObDbmsStatsUtils::copy_global_index_prefix_stats_to_text(share::schema::ObSchemaGetterGuard *schema_guard, + ObIAllocator &allocator, + const ObIArray &column_stats, + const ObIArray &pairs, + uint64_t tenant_id, + uint64_t data_table_id, + ObIArray ©_stats) +{ + int ret = OB_SUCCESS; + const ObTableSchema *table_schema = nullptr; + ObPartitionLevel part_level = schema::ObPartitionLevel::PARTITION_LEVEL_ZERO; + if (OB_ISNULL(schema_guard)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(tenant_id, + data_table_id, + table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected table schema", K(ret)); + } else { + part_level = table_schema->get_part_level(); + } + for (int64_t i = 0; OB_SUCC(ret) && i < pairs.count(); ++i) { + const PrefixColumnPair &pair = pairs.at(i); + for (int64_t j = 0; OB_SUCC(ret) && j < column_stats.count(); ++j) { + const ObOptColumnStat *col_stat = column_stats.at(j); + if (OB_ISNULL(col_stat)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unpexcted null", K(ret)); + } else if (col_stat->get_column_id() == pair.prefix_column_id_ && + (col_stat->get_partition_id() == -1 || + col_stat->get_partition_id() == col_stat->get_table_id())) { + ObOptColumnStat *text_col_stat = NULL; + void *ptr = NULL; + if (OB_FAIL(copy_prefix_column_stat_to_text(allocator, + *col_stat, + pair.related_column_meta_, + text_col_stat))) { + LOG_WARN("failed to copy prefix column stat to text", K(ret)); + } else if (OB_ISNULL(text_col_stat)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(copy_stats.push_back(text_col_stat))) { + LOG_WARN("failed to push back copy stats", K(ret)); + } else { + text_col_stat->set_column_id(pair.related_column_id_); + text_col_stat->set_table_id(data_table_id); + text_col_stat->set_partition_id(part_level == schema::ObPartitionLevel::PARTITION_LEVEL_ZERO ? + data_table_id : -1); + } + } + } + } + return ret; +} + +int ObDbmsStatsUtils::copy_prefix_column_stat_to_text(ObIAllocator &allocator, + const ObOptColumnStat &col_stat, + const ObObjMeta &text_col_meta, + ObOptColumnStat *&text_col_stat) +{ + int ret = OB_SUCCESS; + void *ptr = NULL; + ObString min_value; + ObString max_value; + if (OB_ISNULL(ptr = allocator.alloc(sizeof(ObOptColumnStat)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc opt column stat", K(ret)); + } else if (OB_FALSE_IT(text_col_stat = new (ptr) ObOptColumnStat(allocator))) { + } else if (OB_FAIL(text_col_stat->assign(col_stat))) { + LOG_WARN("failed to deep copy text col stat", K(ret)); + } else if (OB_FAIL(text_col_stat->deep_copy_histogram(col_stat.get_histogram()))) { + LOG_WARN("failed to deep copy histogram", K(ret)); + } else if (OB_FAIL(text_col_stat->deep_copy_llc_bitmap(col_stat.get_llc_bitmap(), + col_stat.get_llc_bitmap_size()))) { + LOG_WARN("failed to deep copy llc bitmap", K(ret)); + } else if (OB_FAIL(col_stat.get_max_value().get_string(max_value))) { + LOG_WARN("failed to get max value", K(ret)); + } else if (OB_FAIL(col_stat.get_min_value().get_string(min_value))) { + LOG_WARN("failed to get min value", K(ret)); + } else if (OB_FAIL(sql::ObTextStringHelper::str_to_lob_storage_obj(allocator, + max_value, + text_col_stat->get_max_value()))) { + LOG_WARN("failed to convert str to lob", K(ret)); + } else if (OB_FAIL(sql::ObTextStringHelper::str_to_lob_storage_obj(allocator, + min_value, + text_col_stat->get_min_value()))) { + LOG_WARN("failed to convert str to lob", K(ret)); + } else { + ObObjMeta meta = text_col_meta; + meta.set_has_lob_header(); + text_col_stat->get_max_value().set_meta_type(meta); + text_col_stat->get_min_value().set_meta_type(meta); + } + return ret; +} } } diff --git a/src/share/stat/ob_dbms_stats_utils.h b/src/share/stat/ob_dbms_stats_utils.h index 8ab24a738e..fa574f7541 100644 --- a/src/share/stat/ob_dbms_stats_utils.h +++ b/src/share/stat/ob_dbms_stats_utils.h @@ -71,6 +71,7 @@ public: static int check_is_stat_table(share::schema::ObSchemaGetterGuard &schema_guard, const uint64_t tenant_id, const int64_t table_id, + bool need_index_table, bool &is_valid); static int check_is_sys_table(share::schema::ObSchemaGetterGuard &schema_guard, @@ -199,6 +200,46 @@ public: static int cancel_async_gather_stats(sql::ObExecContext &ctx); + static int build_index_part_to_table_part_maps(share::schema::ObSchemaGetterGuard *schema_guard, + uint64_t tenant_id, + uint64_t index_table_id, + common::hash::ObHashMap &part_id_map); + + static int deduce_index_column_stat_to_table(share::schema::ObSchemaGetterGuard *schema_guard, + uint64_t tenant_id, + uint64_t index_table_id, + uint64_t data_table_id, + ObPartitionLevel part_level, + ObIArray &all_column_stats); + + static int get_prefix_index_substr_length(const share::schema::ObColumnSchemaV2 &col, + int64_t &length); + + static int get_prefix_index_text_pairs(share::schema::ObSchemaGetterGuard *schema_guard, + uint64_t tenant_id, + uint64_t data_table_id, + ObIArray &func_idxs, + ObIArray &ignore_cols, + ObIArray &pairs); + static int get_all_prefix_index_text_pairs(const share::schema::ObTableSchema &table_schema, + ObIArray &filter_pairs); + + static int copy_local_index_prefix_stats_to_text(ObIAllocator &allocator, + const ObIArray &column_stats, + const ObIArray &pairs, + ObIArray ©_stats); + static int copy_global_index_prefix_stats_to_text(share::schema::ObSchemaGetterGuard *schema_guard, + ObIAllocator &allocator, + const ObIArray &column_stats, + const ObIArray &pairs, + uint64_t tenant_id, + uint64_t data_table_id, + ObIArray &all_column_stats); + static int copy_prefix_column_stat_to_text(ObIAllocator &allocator, + const ObOptColumnStat &col_stat, + const ObObjMeta &text_col_meta, + ObOptColumnStat *&text_column_stat); + private: static int batch_write(share::schema::ObSchemaGetterGuard *schema_guard, const uint64_t tenant_id, @@ -213,6 +254,12 @@ private: static int fetch_need_cancel_async_gather_stats_task(ObIAllocator &allocator, sql::ObExecContext &ctx, ObIArray &task_ids); + static int build_sub_part_maps(const ObTableSchema* table_schema, + const ObTableSchema* index_schema, + const ObPartition *index_part, + const ObPartition *table_part, + ObCheckPartitionMode mode, + common::hash::ObHashMap &part_id_map); }; diff --git a/src/share/stat/ob_incremental_stat_estimator.cpp b/src/share/stat/ob_incremental_stat_estimator.cpp index a7b25baa1f..b818f5d758 100644 --- a/src/share/stat/ob_incremental_stat_estimator.cpp +++ b/src/share/stat/ob_incremental_stat_estimator.cpp @@ -1169,5 +1169,74 @@ int ObIncrementalStatEstimator::prepare_get_opt_stats_param(const ObTableStatPar return ret; } +int ObIncrementalStatEstimator::derive_part_index_column_stat_by_subpart_index(ObExecContext &ctx, + ObIAllocator &alloc, + const ObTableStatParam ¶m, + const ObIArray &part_index_stats, + ObIArray &approx_part_opt_stats) +{ + int ret = OB_SUCCESS; + int64_t cur_part_id = OB_INVALID_ID; + for (int64_t i = 0; OB_SUCC(ret) && i < param.approx_part_infos_.count(); ++i) { + ObOptStat opt_part_stat; + ObSEArray subpart_opt_stats; + for (int64_t j = 0; OB_SUCC(ret) && j < part_index_stats.count(); ++j) { + if (OB_ISNULL(part_index_stats.at(j).table_stat_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(part_index_stats.at(j).table_stat_)); + } else if (ObDbmsStatsUtils::is_subpart_id(param.all_subpart_infos_, + part_index_stats.at(j).table_stat_->get_partition_id(), + cur_part_id)) { + if (param.approx_part_infos_.at(i).part_id_ == cur_part_id) { + if (OB_FAIL(subpart_opt_stats.push_back(part_index_stats.at(j)))) { + LOG_WARN("failed to push back", K(ret)); + } else {/*do nothing*/} + } else {/*do nothing*/} + } else {/*do nothing*/} + } + //derive part stat from subpart stats + if (OB_SUCC(ret)) { + if (OB_UNLIKELY(subpart_opt_stats.count() != param.approx_part_infos_.at(i).subpart_cnt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(subpart_opt_stats.count()), + K(param.approx_part_infos_.at(i))); + } else if (OB_FAIL(do_derive_global_stat(ctx, alloc, param, subpart_opt_stats, false, + PARTITION_LEVEL, param.approx_part_infos_.at(i).part_id_, + opt_part_stat))) { + LOG_WARN("Failed to derive global stat from part stat", K(ret)); + } else if (OB_FAIL(approx_part_opt_stats.push_back(opt_part_stat))) { + LOG_WARN("faield to push back", K(ret)); + } else {/*do nothing*/} + } + } + return ret; +} + +int ObIncrementalStatEstimator::derive_global_index_column_stat_by_part_index(ObExecContext &ctx, + ObIAllocator &alloc, + const ObTableStatParam ¶m, + const ObIArray &part_index_stats, + ObOptStat &global_opt_stat) +{ + int ret = OB_SUCCESS; + ObSEArray tmp_opt_stats; + bool need_derive_hist = true; + if (part_index_stats.empty()) { + /*do nothing*/ + } else if (!param.part_name_.empty()) { + /*do nothing*/ + } else if (OB_FAIL(tmp_opt_stats.assign(part_index_stats))) { + LOG_WARN("failed to assign", K(ret)); + } else if (OB_FAIL(do_derive_global_stat(ctx, alloc, param, tmp_opt_stats, false, + TABLE_LEVEL,param.global_part_id_, + global_opt_stat))) { + LOG_WARN("Failed to derive global stat from part stat", K(ret)); + } else { + LOG_TRACE("Succeed to derive global stat from part stats", K(global_opt_stat), K(param)); + } + return ret; +} + + } // namespace common } // namespace oceanbase diff --git a/src/share/stat/ob_incremental_stat_estimator.h b/src/share/stat/ob_incremental_stat_estimator.h index 5ac975c449..b6812d90b3 100644 --- a/src/share/stat/ob_incremental_stat_estimator.h +++ b/src/share/stat/ob_incremental_stat_estimator.h @@ -52,6 +52,18 @@ public: static int derive_global_index_stat_by_part_index_stats(const ObTableStatParam ¶m, const ObIArray &part_index_stats, ObIArray &all_index_stats); + + static int derive_part_index_column_stat_by_subpart_index(ObExecContext &ctx, + ObIAllocator &alloc, + const ObTableStatParam ¶m, + const ObIArray &part_index_stats, + ObIArray &approx_part_opt_stats); + + static int derive_global_index_column_stat_by_part_index(ObExecContext &ctx, + ObIAllocator &alloc, + const ObTableStatParam ¶m, + const ObIArray &part_index_stats, + ObOptStat &global_opt_stat); private: static int do_derive_part_stats_from_subpart_stats( ObExecContext &ctx, diff --git a/src/share/stat/ob_index_stats_estimator.cpp b/src/share/stat/ob_index_stats_estimator.cpp index a2d24537ce..951ad584bc 100644 --- a/src/share/stat/ob_index_stats_estimator.cpp +++ b/src/share/stat/ob_index_stats_estimator.cpp @@ -91,7 +91,15 @@ int ObIndexStatsEstimator::estimate(const ObOptStatGatherParam ¶m, const ObColumnStatParam *col_param = &column_params.at(i); if (OB_FAIL(add_stat_item(ObStatAvgLen(col_param, src_col_stats.at(i))))) { LOG_WARN("failed to add statistic item", K(ret)); - } else {/*do nothing*/} + } else if (!col_param->need_basic_stat()) { + // do nothing + } else if (OB_FAIL(add_stat_item(ObStatMaxValue(col_param, src_col_stats.at(i)))) || + OB_FAIL(add_stat_item(ObStatMinValue(col_param, src_col_stats.at(i)))) || + OB_FAIL(add_stat_item(ObStatNumNull(col_param, src_tab_stat, src_col_stats.at(i)))) || + OB_FAIL(add_stat_item(ObStatNumDistinct(col_param, src_col_stats.at(i), param.need_approx_ndv_))) || + OB_FAIL(add_stat_item(ObStatLlcBitmap(col_param, src_col_stats.at(i))))) { + LOG_WARN("failed to add statistic item", K(ret)); + } } if (OB_SUCC(ret)) { if (OB_FAIL(add_stat_item(ObStatAvgRowLen(src_tab_stat, src_col_stats)))) { diff --git a/src/share/stat/ob_opt_stat_manager.cpp b/src/share/stat/ob_opt_stat_manager.cpp index 8afbe48eb9..e96fcb310e 100644 --- a/src/share/stat/ob_opt_stat_manager.cpp +++ b/src/share/stat/ob_opt_stat_manager.cpp @@ -556,7 +556,7 @@ int ObOptStatManager::check_opt_stat_validity(sql::ObExecContext &ctx, } else if (!is_valid) { //do nothing } else if (OB_FAIL(ObDbmsStatsUtils::check_is_stat_table(*ctx.get_virtual_table_ctx().schema_guard_, - tenant_id, table_ref_id, is_valid))) { + tenant_id, table_ref_id, true, is_valid))) { LOG_WARN("failed to check is stat table", K(ret)); } else if (!is_valid) { //do nothing diff --git a/src/share/stat/ob_opt_stat_monitor_manager.cpp b/src/share/stat/ob_opt_stat_monitor_manager.cpp index 02f5b53397..556f66e521 100644 --- a/src/share/stat/ob_opt_stat_monitor_manager.cpp +++ b/src/share/stat/ob_opt_stat_monitor_manager.cpp @@ -934,6 +934,7 @@ int ObOptStatMonitorManager::gen_tablet_list(const ObIArray &dml_s if (!is_valid && OB_FAIL(ObDbmsStatsUtils::check_is_stat_table(schema_guard, dml_stats.at(i).tenant_id_, dml_stats.at(i).table_id_, + false, is_valid))) { LOG_WARN("failed to check is stat table", K(ret)); } else if (is_valid) { diff --git a/src/share/stat/ob_stat_define.cpp b/src/share/stat/ob_stat_define.cpp index 342fb79728..d98bcee350 100644 --- a/src/share/stat/ob_stat_define.cpp +++ b/src/share/stat/ob_stat_define.cpp @@ -33,7 +33,7 @@ void ObAnalyzeSampleInfo::set_rows(double row_num) sample_value_ = row_num; } -bool ObColumnStatParam::is_valid_opt_col_type(const ObObjType type) +bool ObColumnStatParam::is_valid_opt_col_type(const ObObjType type, bool is_online_stat) { bool ret = false; // currently, we only support the following type to collect histogram @@ -54,7 +54,8 @@ bool ObColumnStatParam::is_valid_opt_col_type(const ObObjType type) type_class == ColumnTypeClass::ObEnumSetTC || type_class == ColumnTypeClass::ObIntervalTC || type_class == ColumnTypeClass::ObDecimalIntTC || - (lib::is_mysql_mode() && type_class == ColumnTypeClass::ObTextTC)) { + (lib::is_mysql_mode() && type_class == ColumnTypeClass::ObTextTC) || + (!is_online_stat && lib::is_mysql_mode() && type_class == ColumnTypeClass::ObTextTC)) { ret = true; } return ret; @@ -270,6 +271,9 @@ int ObOptStatGatherParam::assign(const ObOptStatGatherParam &other) hist_sample_info_.is_block_sample_ = other.hist_sample_info_.is_block_sample_; hist_sample_info_.sample_type_ = other.hist_sample_info_.sample_type_; hist_sample_info_.sample_value_ = other.hist_sample_info_.sample_value_; + data_table_id_ = other.data_table_id_; + is_global_index_ = other.is_global_index_; + part_level_ = other.part_level_; if (OB_FAIL(partition_infos_.assign(other.partition_infos_))) { LOG_WARN("failed to assign", K(ret)); } else if (OB_FAIL(column_params_.assign(other.column_params_))) { diff --git a/src/share/stat/ob_stat_define.h b/src/share/stat/ob_stat_define.h index a1fc259c89..73e003a82e 100644 --- a/src/share/stat/ob_stat_define.h +++ b/src/share/stat/ob_stat_define.h @@ -124,7 +124,8 @@ enum ColumnAttrFlag IS_INDEX_COL = 1, IS_HIDDEN_COL = 1 << 1, IS_UNIQUE_COL = 1 << 2, - IS_NOT_NULL_COL = 1 << 3 + IS_NOT_NULL_COL = 1 << 3, + IS_TEXT_COL = 1 << 4 }; enum ColumnGatherFlag @@ -414,10 +415,13 @@ struct ObColumnStatParam { inline void set_is_hidden_column() { column_attribute_ |= ColumnAttrFlag::IS_HIDDEN_COL; } inline void set_is_unique_column() { column_attribute_ |= ColumnAttrFlag::IS_UNIQUE_COL; } inline void set_is_not_null_column() { column_attribute_ |= ColumnAttrFlag::IS_NOT_NULL_COL; } + inline void set_is_text_column() { column_attribute_ |= ColumnAttrFlag::IS_TEXT_COL; } inline bool is_index_column() const { return column_attribute_ & ColumnAttrFlag::IS_INDEX_COL; } inline bool is_hidden_column() const { return column_attribute_ & ColumnAttrFlag::IS_HIDDEN_COL; } inline bool is_unique_column() const { return column_attribute_ & ColumnAttrFlag::IS_UNIQUE_COL; } inline bool is_not_null_column() const { return column_attribute_ & ColumnAttrFlag::IS_NOT_NULL_COL; } + inline bool is_text_column() const { return column_attribute_ & ColumnAttrFlag::IS_TEXT_COL; } + inline void unset_text_column() { column_attribute_ &= ~ColumnAttrFlag::IS_TEXT_COL; } inline void set_valid_opt_col() { gather_flag_ |= ColumnGatherFlag::VALID_OPT_COL; } inline void set_need_basic_stat() { gather_flag_ |= ColumnGatherFlag::NEED_BASIC_STAT; } inline void set_need_avg_len() { gather_flag_ |= ColumnGatherFlag::NEED_AVG_LEN; } @@ -425,6 +429,7 @@ struct ObColumnStatParam { inline bool need_basic_stat() const { return gather_flag_ & ColumnGatherFlag::NEED_BASIC_STAT; } inline bool need_avg_len() const { return gather_flag_ & ColumnGatherFlag::NEED_AVG_LEN; } inline bool need_col_stat() const { return gather_flag_ != ColumnGatherFlag::NO_NEED_STAT; } + inline void unset_need_basic_stat() { gather_flag_ &= ~ColumnGatherFlag::NEED_BASIC_STAT; } ObString column_name_; uint64_t column_id_; @@ -436,7 +441,7 @@ struct ObColumnStatParam { int64_t column_usage_flag_; int64_t gather_flag_; - static bool is_valid_opt_col_type(const ObObjType type); + static bool is_valid_opt_col_type(const ObObjType type, bool is_online_stat = false); static bool is_valid_avglen_type(const ObObjType type); static const int64_t DEFAULT_HISTOGRAM_BUCKET_NUM; @@ -457,6 +462,33 @@ struct ObColumnGroupStatParam { TO_STRING_KV(K(column_group_id_), K(column_id_arr_)); }; +struct PrefixColumnPair { + PrefixColumnPair() : PrefixColumnPair(OB_INVALID_ID, + OB_INVALID_ID, + 0) {} + PrefixColumnPair(uint64_t p, uint64_t r, int64_t l) + : prefix_column_id_(p), related_column_id_(r), prefix_length_(l) {} + + PrefixColumnPair(const PrefixColumnPair &other) { + *this = other; + } + + void operator = (const PrefixColumnPair &other) { + prefix_column_id_ = other.prefix_column_id_; + related_column_id_ = other.related_column_id_; + prefix_length_ = other.prefix_length_; + related_column_meta_ = other.related_column_meta_; + } + + TO_STRING_KV(K(prefix_column_id_), + K(related_column_id_), + K(prefix_length_)); + uint64_t prefix_column_id_; + uint64_t related_column_id_; + int64_t prefix_length_; + ObObjMeta related_column_meta_; +}; + struct ObTableStatParam { static const int64_t INVALID_GLOBAL_PART_ID = -2; static const int64_t DEFAULT_DATA_PART_ID = -1; @@ -599,6 +631,7 @@ struct ObTableStatParam { int64_t async_full_table_size_; const ObIArray *async_partition_ids_; ObAnalyzeSampleInfo hist_sample_info_; + ObSEArray prefix_column_pairs_; TO_STRING_KV(K(tenant_id_), K(db_name_), @@ -648,7 +681,8 @@ struct ObTableStatParam { K(async_gather_sample_size_), K(async_full_table_size_), KPC(async_partition_ids_), - K(hist_sample_info_)); + K(hist_sample_info_), + K(prefix_column_pairs_)); }; struct ObOptStatGatherParam { @@ -680,7 +714,10 @@ struct ObOptStatGatherParam { is_async_gather_(false), async_gather_sample_size_(DEFAULT_ASYNC_SAMPLE_SIZE), async_full_table_size_(DEFAULT_ASYNC_FULL_TABLE_SIZE), - hist_sample_info_() + hist_sample_info_(), + data_table_id_(OB_INVALID_ID), + is_global_index_(false), + part_level_(share::schema::ObPartitionLevel::PARTITION_LEVEL_ZERO) {} int assign(const ObOptStatGatherParam &other); int64_t get_need_gather_column() const; @@ -712,6 +749,9 @@ struct ObOptStatGatherParam { int64_t async_gather_sample_size_; int64_t async_full_table_size_; ObAnalyzeSampleInfo hist_sample_info_; + uint64_t data_table_id_; + bool is_global_index_; + share::schema::ObPartitionLevel part_level_; TO_STRING_KV(K(tenant_id_), K(db_name_), @@ -738,7 +778,9 @@ struct ObOptStatGatherParam { K(is_async_gather_), K(async_gather_sample_size_), K(async_full_table_size_), - K(hist_sample_info_)); + K(hist_sample_info_), + K(data_table_id_), + K(is_global_index_)); }; struct ObOptStat diff --git a/src/sql/engine/opt_statistics/ob_optimizer_stats_gathering_op.cpp b/src/sql/engine/opt_statistics/ob_optimizer_stats_gathering_op.cpp index c4aac18a3c..48e0b2cd3d 100644 --- a/src/sql/engine/opt_statistics/ob_optimizer_stats_gathering_op.cpp +++ b/src/sql/engine/opt_statistics/ob_optimizer_stats_gathering_op.cpp @@ -358,7 +358,7 @@ int ObOptimizerStatsGatheringOp::calc_column_stats(ObExpr *expr, uint64_t column } else if (OB_ISNULL(global_col_stat)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); - } else if (!ObColumnStatParam::is_valid_opt_col_type(expr->obj_meta_.get_type())) { + } else if (!ObColumnStatParam::is_valid_opt_col_type(expr->obj_meta_.get_type(), true)) { // do nothing yet, should use the plain stats. } else if (OB_FAIL(expr->eval(eval_ctx_, datum))) { LOG_WARN("failed to eval expr", K(*expr)); diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index 1d5142e23f..fd9001580d 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -13402,7 +13402,8 @@ int ObJoinOrder::fill_path_index_meta_info(const uint64_t table_id, } else if (OB_ISNULL(table_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("index schema should not be null", K(ret), K(index_id)); - } else if (OB_FAIL(init_est_info_for_index(index_id, + } else if (OB_FAIL(init_est_info_for_index(table_id, + index_id, index_meta_info, ap->table_partition_info_, ap->is_global_index_ ? *index_schema : *table_schema, @@ -13730,7 +13731,8 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, return ret; } -int ObJoinOrder::init_est_info_for_index(const uint64_t index_id, +int ObJoinOrder::init_est_info_for_index(const uint64_t table_id, + const uint64_t index_id, ObIndexMetaInfo &index_meta_info, ObTablePartitionInfo *table_partition_info, const share::schema::ObTableSchema &index_schema, @@ -13740,6 +13742,7 @@ int ObJoinOrder::init_est_info_for_index(const uint64_t index_id, has_opt_stat = false; ObSQLSessionInfo *session_info = NULL; ObSchemaGetterGuard *schema_guard = NULL; + if (OB_UNLIKELY(OB_INVALID_ID == index_id) || OB_ISNULL(table_partition_info) || OB_ISNULL(session_info = OPT_CTX.get_session_info()) || @@ -13765,6 +13768,7 @@ int ObJoinOrder::init_est_info_for_index(const uint64_t index_id, bool use_global = false; ObSEArray global_part_ids; double scale_ratio = 1.0; + double index_rows = 0.; if (OPT_CTX.use_default_stat()) { // do nothing } else if (OB_ISNULL(OPT_CTX.get_opt_stat_manager())) { @@ -13800,7 +13804,6 @@ int ObJoinOrder::init_est_info_for_index(const uint64_t index_id, } LOG_TRACE("statistics (0: default, 1: user-gathered, 2: user_gathered_global_stat)", K(stat_type), K(index_id), K(all_used_part_id)); - if (OB_SUCC(ret) && has_opt_stat) { ObGlobalTableStat stat; if (OB_FAIL(OPT_CTX.get_opt_stat_manager()->get_table_stat(session_info->get_effective_tenant_id(), @@ -13815,9 +13818,72 @@ int ObJoinOrder::init_est_info_for_index(const uint64_t index_id, static_cast(stat.get_avg_data_size() * all_used_part_id.count()) / origin_part_cnt; index_meta_info.index_micro_block_count_ = stat.get_micro_block_count(); + index_rows = stat.get_row_count(); LOG_TRACE("index table, use statistics", K(index_meta_info), K(stat)); } } + if (OB_SUCC(ret) && has_opt_stat && index_schema.is_global_index_table()) { + OptTableMeta* table_meta = NULL; + ObSEArray column_exprs; + ObSEArray column_ids; + if (OB_FAIL(get_plan()->get_column_exprs(table_id, column_exprs))) { + LOG_WARN("failed to get column exprs", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < column_exprs.count(); ++i) { + ObColumnRefRawExpr *col_expr = column_exprs.at(i); + if (OB_ISNULL(col_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null col expr", K(ret)); + } else if (!col_expr->is_hidden_column()) { + // do nothing + } else if (OB_FAIL(column_ids.push_back(col_expr->get_column_id()))) { + LOG_WARN("failed to push back column id", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (column_ids.empty()) { + // do nothing + } else if (OB_ISNULL(table_meta = get_plan()->get_basic_table_metas() + .get_table_meta_by_table_id(table_id))) { + // do nothing + } else { + double rows = table_meta->get_rows(); + double function_index_scale_ratio = 1.0; + if (rows < index_rows) { + function_index_scale_ratio = rows / index_rows; + } + for (int64_t i = 0; OB_SUCC(ret) && i < column_ids.count(); ++i) { + int64_t global_ndv = 0; + int64_t num_null = 0; + ObGlobalColumnStat stat; + OptColumnMeta *col_meta = table_meta->get_column_meta(column_ids.at(i)); + if (col_meta != NULL) { + if (OB_FAIL(OPT_CTX.get_opt_stat_manager()->get_column_stat(OPT_CTX.get_session_info()->get_effective_tenant_id(), + index_id, + all_used_part_id, + column_ids.at(i), + global_part_ids, + index_rows, + function_index_scale_ratio, + stat, + &OPT_CTX.get_allocator()))) { + LOG_WARN("failed to get column stats", K(ret)); + } else if (OB_FAIL(OptTableMeta::refine_column_stat(stat, rows, *col_meta))) { + LOG_WARN("failed to refine column stat", K(ret)); + } else { + global_ndv = col_meta->get_ndv(); + num_null = col_meta->get_num_null(); + col_meta->set_ndv(rows < global_ndv ? rows : global_ndv); + col_meta->set_num_null(rows < num_null ? rows : num_null); + col_meta->set_avg_len(stat.avglen_val_); + col_meta->set_min_value(stat.min_val_); + col_meta->set_max_value(stat.max_val_); + col_meta->set_min_max_inited(true); + } + } + } + } + } } } return ret; diff --git a/src/sql/optimizer/ob_join_order.h b/src/sql/optimizer/ob_join_order.h index 05331c0804..7c34c3a0c8 100644 --- a/src/sql/optimizer/ob_join_order.h +++ b/src/sql/optimizer/ob_join_order.h @@ -1534,7 +1534,8 @@ struct NullAwareAntiJoinInfo { const uint64_t ref_table_id, const share::schema::ObTableSchema &table_schema); - int init_est_info_for_index(const uint64_t index_id, + int init_est_info_for_index(const uint64_t table_id, + const uint64_t index_id, ObIndexMetaInfo &meta_info, ObTablePartitionInfo *table_partition_info, const share::schema::ObTableSchema &index_schema, diff --git a/src/sql/optimizer/ob_opt_selectivity.cpp b/src/sql/optimizer/ob_opt_selectivity.cpp index 4819b917c7..85272b0509 100644 --- a/src/sql/optimizer/ob_opt_selectivity.cpp +++ b/src/sql/optimizer/ob_opt_selectivity.cpp @@ -270,6 +270,8 @@ int OptTableMeta::init_column_meta(const OptSelectivityCtx &ctx, int ret = OB_SUCCESS; ObGlobalColumnStat stat; bool is_single_pkey = (1 == pk_ids_.count() && pk_ids_.at(0) == column_id); + int64_t global_ndv = 0; + int64_t num_null = 0; if (is_single_pkey) { col_meta.set_ndv(rows_); col_meta.set_num_null(0); @@ -288,14 +290,8 @@ int OptTableMeta::init_column_meta(const OptSelectivityCtx &ctx, scale_ratio_, stat))) { LOG_WARN("failed to get column stats", K(ret)); - } else if (0 == stat.ndv_val_ && 0 == stat.null_val_) { - col_meta.set_default_meta(rows_); - } else if (0 == stat.ndv_val_ && stat.null_val_ > 0) { - col_meta.set_ndv(1); - col_meta.set_num_null(stat.null_val_); - } else { - col_meta.set_ndv(stat.ndv_val_); - col_meta.set_num_null(stat.null_val_); + } else if (OB_FAIL(refine_column_stat(stat, rows_, col_meta))) { + LOG_WARN("failed to refine column stat", K(ret)); } if (OB_SUCC(ret)) { @@ -369,6 +365,34 @@ const OptColumnMeta* OptTableMeta::get_column_meta(const uint64_t column_id) con return column_meta; } +OptColumnMeta* OptTableMeta::get_column_meta(const uint64_t column_id) +{ + OptColumnMeta* column_meta = NULL; + for (int64_t i = 0; NULL == column_meta && i < column_metas_.count(); ++i) { + if (column_metas_.at(i).get_column_id() == column_id) { + column_meta = &column_metas_.at(i); + } + } + return column_meta; +} + +int OptTableMeta::refine_column_stat(const ObGlobalColumnStat &stat, + double rows, + OptColumnMeta &col_meta) +{ + int ret = OB_SUCCESS; + if (0 == stat.ndv_val_ && 0 == stat.null_val_) { + col_meta.set_default_meta(rows); + } else if (0 == stat.ndv_val_ && stat.null_val_ > 0) { + col_meta.set_ndv(1); + col_meta.set_num_null(stat.null_val_); + } else { + col_meta.set_ndv(stat.ndv_val_); + col_meta.set_num_null(stat.null_val_); + } + return ret; +} + int OptTableMetas::copy_table_meta_info(const OptTableMeta &src_meta, OptTableMeta *&dst_meta) { int ret = OB_SUCCESS; diff --git a/src/sql/optimizer/ob_opt_selectivity.h b/src/sql/optimizer/ob_opt_selectivity.h index 1599dea759..df04ee04b3 100644 --- a/src/sql/optimizer/ob_opt_selectivity.h +++ b/src/sql/optimizer/ob_opt_selectivity.h @@ -440,7 +440,7 @@ public: int add_column_meta_no_dup(const uint64_t column_id, const OptSelectivityCtx &ctx); const OptColumnMeta* get_column_meta(const uint64_t column_id) const; - + OptColumnMeta* get_column_meta(const uint64_t column_id); uint64_t get_table_id() const { return table_id_; } void set_table_id(const uint64_t &table_id) { table_id_ = table_id; } uint64_t get_ref_table_id() const { return ref_table_id_; } @@ -486,6 +486,10 @@ public: real_rows_ = -1.0; } + static int refine_column_stat(const ObGlobalColumnStat &stat, + double rows, + OptColumnMeta &col_meta); + TO_STRING_KV(K_(table_id), K_(ref_table_id), K_(table_type), K_(rows), K_(stat_type), K_(ds_level), K_(all_used_parts), K_(all_used_tablets), K_(pk_ids), K_(column_metas), K_(all_used_global_parts), K_(scale_ratio), K_(stat_locked), K_(distinct_rows), K_(real_rows)); diff --git a/src/storage/direct_load/ob_direct_load_insert_table_ctx.cpp b/src/storage/direct_load/ob_direct_load_insert_table_ctx.cpp index 6cb9539ac3..249f5657f3 100644 --- a/src/storage/direct_load/ob_direct_load_insert_table_ctx.cpp +++ b/src/storage/direct_load/ob_direct_load_insert_table_ctx.cpp @@ -958,7 +958,7 @@ int ObDirectLoadInsertTableContext::update_sql_statistics(ObTableLoadSqlStatisti const ObStorageDatum &datum = datum_row.storage_datums_[datum_idx]; const ObCmpFunc &cmp_func = param_.cmp_funcs_->at(i).get_cmp_func(); const ObColDesc &col_desc = param_.col_descs_->at(i); - const bool is_valid = ObColumnStatParam::is_valid_opt_col_type(col_desc.col_type_.get_type()); + const bool is_valid = ObColumnStatParam::is_valid_opt_col_type(col_desc.col_type_.get_type(), true); if (is_valid) { if (OB_FAIL(sql_statistics.get_col_stat(col_stat_idx, col_stat))) { LOG_WARN("fail to get col stat", KR(ret), K(col_stat_idx));