diff --git a/src/pl/sys_package/ob_dbms_stats.cpp b/src/pl/sys_package/ob_dbms_stats.cpp index de8ea221b..ab220a86c 100644 --- a/src/pl/sys_package/ob_dbms_stats.cpp +++ b/src/pl/sys_package/ob_dbms_stats.cpp @@ -244,8 +244,8 @@ int ObDbmsStats::gather_index_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb LOG_WARN("failed to get common rpc", K(ret)); } else if (lib::is_oracle_mode() && !params.at(11).is_null()) { ret = OB_ERR_DBMS_STATS_PL; - LOG_WARN("table name not should be specified in gather index stats", K(ret)); - LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name not should be specified in gather index stats"); + LOG_WARN("table name shouldn't be specified in gather index stats", K(ret)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name shouldn't be specified in gather index stats"); } else if (lib::is_mysql_mode() && params.at(11).is_null()) { ret = OB_ERR_DBMS_STATS_PL; LOG_WARN("table name should be specified in gather index stats", K(ret)); @@ -639,8 +639,8 @@ int ObDbmsStats::set_index_stats(ObExecContext &ctx, ParamStore ¶ms, ObObj & LOG_WARN("failed to get common rpc", K(ret)); } else if (lib::is_oracle_mode() && !params.at(22).is_null()) { ret = OB_ERR_DBMS_STATS_PL; - LOG_WARN("table name not should be specified in gather index stats", K(ret)); - LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name not should be specified in gather index stats"); + LOG_WARN("table name shouldn't be specified in gather index stats", K(ret)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name shouldn't be specified in gather index stats"); } else if (lib::is_mysql_mode() && params.at(22).is_null()) { ret = OB_ERR_DBMS_STATS_PL; LOG_WARN("table name should be specified in gather index stats", K(ret)); @@ -1001,8 +1001,8 @@ int ObDbmsStats::delete_index_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb LOG_WARN("failed to get common rpc", K(ret)); } else if (lib::is_oracle_mode() && !params.at(10).is_null()) { ret = OB_ERR_DBMS_STATS_PL; - LOG_WARN("table name not should be specified in gather index stats", K(ret)); - LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name not should be specified in gather index stats"); + LOG_WARN("table name shouldn't be specified in gather index stats", K(ret)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name shouldn't be specified in gather index stats"); } else if (lib::is_mysql_mode() && params.at(10).is_null()) { ret = OB_ERR_DBMS_STATS_PL; LOG_WARN("table name should be specified in gather index stats", K(ret)); @@ -1245,10 +1245,6 @@ int ObDbmsStats::export_table_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb K(stat_table_param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_), to_cstring(stat_table_param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (!params.at(4).is_null() && OB_FAIL(params.at(4).get_varchar(stat_param.stat_id_))) { LOG_WARN("failed to get stat id", K(ret)); } else if (!params.at(5).is_null() && OB_FAIL(params.at(5).get_bool(stat_param.cascade_))) { @@ -1322,10 +1318,6 @@ int ObDbmsStats::export_column_stats(sql::ObExecContext &ctx, K(stat_table_param.db_name_), K(stat_table_param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_), to_cstring(stat_table_param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (!params.at(5).is_null() && OB_FAIL(params.at(5).get_varchar((stat_param.stat_id_)))) { LOG_WARN("failed to get stat id ", K(ret)); } else { @@ -1378,10 +1370,6 @@ int ObDbmsStats::export_schema_stats(ObExecContext &ctx, ParamStore ¶ms, ObO K(stat_table_param.db_name_), K(stat_table_param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_), to_cstring(stat_table_param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (!params.at(2).is_null() && OB_FAIL(params.at(5).get_varchar((stat_table_param.stat_id_)))) { LOG_WARN("failed to get stat id ", K(ret)); } else { @@ -1437,8 +1425,8 @@ int ObDbmsStats::export_index_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb LOG_WARN("failed to check tenant is restore", K(ret)); } else if (lib::is_oracle_mode() && !params.at(6).is_null()) { ret = OB_ERR_DBMS_STATS_PL; - LOG_WARN("table name not should be specified in gather index stats", K(ret)); - LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name not should be specified in gather index stats"); + LOG_WARN("table name shouldn't be specified in gather index stats", K(ret)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name shouldn't be specified in gather index stats"); } else if (lib::is_mysql_mode() && params.at(6).is_null()) { ret = OB_ERR_DBMS_STATS_PL; LOG_WARN("table name should be specified in gather index stats", K(ret)); @@ -1463,10 +1451,6 @@ int ObDbmsStats::export_index_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb K(stat_table_param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_), to_cstring(stat_table_param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (!params.at(4).is_null() && OB_FAIL(params.at(4).get_varchar(index_stat_param.stat_id_))) { LOG_WARN("failed to get stat id", K(ret)); } else { @@ -1563,10 +1547,6 @@ int ObDbmsStats::import_table_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb K(stat_table_param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_), to_cstring(stat_table_param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (!params.at(4).is_null() && OB_FAIL(params.at(4).get_varchar(stat_param.stat_id_))) { LOG_WARN("failed to get stat id ", K(ret)); } else if (!params.at(5).is_null() && OB_FAIL(params.at(5).get_bool(stat_param.cascade_))) { @@ -1679,10 +1659,6 @@ int ObDbmsStats::import_column_stats(sql::ObExecContext &ctx, K(stat_table_param.db_name_), K(stat_table_param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_), to_cstring(stat_table_param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (!params.at(5).is_null() && OB_FAIL(params.at(5).get_varchar(stat_param.stat_id_))) { LOG_WARN("failed to get stat id ", K(ret)); } else if (!params.at(7).is_null() && OB_FAIL(params.at(7).get_bool(stat_param.no_invalidate_))) { @@ -1767,10 +1743,6 @@ int ObDbmsStats::import_schema_stats(ObExecContext &ctx, ParamStore ¶ms, ObO K(stat_table_param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_), to_cstring(stat_table_param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (!params.at(2).is_null() && OB_FAIL(params.at(4).get_varchar(stat_table_param.stat_id_))) { LOG_WARN("failed to get stat id ", K(ret)); } else { @@ -1852,8 +1824,8 @@ int ObDbmsStats::import_index_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb LOG_WARN("failed to get common rpc", K(ret)); } else if (lib::is_oracle_mode() && !params.at(8).is_null()) { ret = OB_ERR_DBMS_STATS_PL; - LOG_WARN("table name not should be specified in gather index stats", K(ret)); - LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name not should be specified in gather index stats"); + LOG_WARN("table name shouldn't be specified in gather index stats", K(ret)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name shouldn't be specified in gather index stats"); } else if (lib::is_mysql_mode() && params.at(8).is_null()) { ret = OB_ERR_DBMS_STATS_PL; LOG_WARN("table name should be specified in gather index stats", K(ret)); @@ -1878,10 +1850,6 @@ int ObDbmsStats::import_index_stats(ObExecContext &ctx, ParamStore ¶ms, ObOb K(stat_table_param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_), to_cstring(stat_table_param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (!params.at(4).is_null() && OB_FAIL(params.at(4).get_varchar(index_stat_param.stat_id_))) { LOG_WARN("failed to get stat id ", K(ret)); } else if (!params.at(6).is_null() && OB_FAIL(params.at(6).get_bool(index_stat_param.no_invalidate_))) { @@ -2384,6 +2352,7 @@ int ObDbmsStats::restore_table_stats(sql::ObExecContext &ctx, LOG_WARN("failed to get force", K(ret)); } else if (!params.at(5).is_null() && OB_FAIL(params.at(5).get_bool(stat_param.no_invalidate_))) { LOG_WARN("failed to get no_invalidate", K(ret)); + } else if (stat_param.is_temp_table_) {//do nothing // oracle don't do this, compatible oracle temporarily // } else if (stat_param.force_ && // OB_FAIL(ObDbmsStatsLockUnlock::fill_stat_locked(ctx, stat_param))) { @@ -3020,10 +2989,6 @@ int ObDbmsStats::parse_table_part_info(ObExecContext &ctx, ret = OB_TABLE_NOT_EXIST; LOG_WARN("table schema is null", K(ret), K(table_schema), K(param.db_name_), K(param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(param.db_name_), to_cstring(param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (OB_FAIL(get_table_part_infos(table_schema, param.part_infos_, param.subpart_infos_, @@ -3074,7 +3039,7 @@ int ObDbmsStats::parse_table_part_info(ObExecContext &ctx, LOG_WARN("params have null", K(ret), K(schema_guard)); } else if (OB_FAIL(parse_table_info(ctx, stat_table, table_schema, param))) { LOG_WARN("failed to parse table info", K(ret)); - } else if (OB_UNLIKELY(table_schema->is_view_table() || table_schema->is_tmp_table())) { + } else if (OB_UNLIKELY(table_schema->is_view_table())) { ret = OB_TABLE_NOT_EXIST; } else if (OB_FAIL(get_table_part_infos(table_schema, param.part_infos_, @@ -3138,10 +3103,6 @@ int ObDbmsStats::parse_index_part_info(ObExecContext &ctx, ret = OB_TABLE_NOT_EXIST; LOG_WARN("table schema is null", K(ret), K(index_schema), K(param.db_name_), K(param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (OB_FAIL(set_param_global_part_id(ctx, param, true, table_schema->get_table_id()))) { LOG_WARN("fail to set global part id for index data table", K(ret)); } else if (OB_FAIL(ob_write_string(ctx.get_allocator(), @@ -3332,10 +3293,6 @@ int ObDbmsStats::parse_set_table_info(ObExecContext &ctx, ret = OB_TABLE_NOT_EXIST; LOG_WARN("table schema is null", K(ret), K(table_schema), K(param.db_name_), K(param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(param.db_name_), to_cstring(param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (OB_FAIL(parse_set_partition_name(table_schema, part_name, param))) { LOG_WARN("failed to parser part info", K(ret)); } else if (OB_FAIL(init_column_stat_params(ctx.get_allocator(), @@ -3387,10 +3344,6 @@ int ObDbmsStats::parse_set_column_stats(ObExecContext &ctx, ret = OB_TABLE_NOT_EXIST; LOG_WARN("table schema is null", K(ret), K(table_schema), K(param.db_name_), K(param.tab_name_)); LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(param.db_name_), to_cstring(param.tab_name_)); - } else if (table_schema->is_tmp_table()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("dbms_stats with temp table not support", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table"); } else if (OB_FAIL(colname.get_string(column_name))) { LOG_WARN("failed to get column name", K(ret)); } else if (OB_UNLIKELY(column_name.empty())) { @@ -3660,6 +3613,7 @@ int ObDbmsStats::parse_table_info(ObExecContext &ctx, LOG_WARN("failed to write string", K(ret)); } else { param.tenant_id_ = session->get_effective_tenant_id(); + param.is_temp_table_ = table_schema->is_tmp_table(); } if (OB_SUCC(ret) && table_schema != NULL) { param.table_id_ = table_schema->get_table_id(); diff --git a/src/share/stat/ob_basic_stats_estimator.cpp b/src/share/stat/ob_basic_stats_estimator.cpp index 0de045748..0a06e0f26 100644 --- a/src/share/stat/ob_basic_stats_estimator.cpp +++ b/src/share/stat/ob_basic_stats_estimator.cpp @@ -54,7 +54,7 @@ int ObBasicStatsEstimator::estimate(const ObTableStatParam ¶m, src_opt_stat.table_stat_ = &tab_stat; ObOptTableStat *src_tab_stat = src_opt_stat.table_stat_; ObIArray &src_col_stats = src_opt_stat.column_stats_; - ObArenaAllocator allocator(ObModIds::OB_SQL_PARSER); + ObArenaAllocator allocator("ObBasicStats"); ObSqlString raw_sql; int64_t duration_time = -1; // Note that there are dependences between different kinds of statistics @@ -68,11 +68,11 @@ int ObBasicStatsEstimator::estimate(const ObTableStatParam ¶m, column_params.count(), src_col_stats))) { LOG_WARN("failed init col stats", K(ret)); - } else if (OB_FAIL(add_hint(no_rewrite, ctx_.get_allocator()))) { + } else if (OB_FAIL(add_hint(no_rewrite, allocator))) { LOG_WARN("failed to add no_rewrite", K(ret)); } else if (OB_FAIL(add_from_table(param.db_name_, param.tab_name_))) { LOG_WARN("failed to add from table", K(ret)); - } else if (OB_FAIL(fill_parallel_info(ctx_.get_allocator(), param.degree_))) { + } else if (OB_FAIL(fill_parallel_info(allocator, param.degree_))) { LOG_WARN("failed to add query sql parallel info", K(ret)); } else if (OB_FAIL(ObDbmsStatsUtils::get_valid_duration_time(extra.start_time_, param.duration_time_, @@ -80,13 +80,15 @@ int ObBasicStatsEstimator::estimate(const ObTableStatParam ¶m, LOG_WARN("failed to get valid duration time", K(ret)); } else if (OB_FAIL(fill_query_timeout_info(ctx_.get_allocator(), duration_time))) { LOG_WARN("failed to fill query timeout info", K(ret)); + } else if (OB_FAIL(fill_sample_info(allocator, param.sample_info_))) { + LOG_WARN("failed to fill sample info", K(ret)); } else if (dst_opt_stats.count() > 1 && - OB_FAIL(fill_group_by_info(ctx_.get_allocator(), param, extra, calc_part_id_str))) { + OB_FAIL(fill_group_by_info(allocator, param, extra, calc_part_id_str))) { LOG_WARN("failed to add query sql partition info", K(ret)); } else if (OB_FAIL(add_stat_item(ObStatRowCount(¶m, src_tab_stat)))) { LOG_WARN("failed to add row count", K(ret)); } else if (calc_part_id_str.empty()) { - if (OB_FAIL(fill_partition_info(ctx_.get_allocator(), param, extra))) { + if (OB_FAIL(fill_partition_info(allocator, param, extra))) { LOG_WARN("failed to add query sql parallel info", K(ret)); } else if (OB_UNLIKELY(dst_opt_stats.count() != 1) || OB_ISNULL(dst_opt_stats.at(0).table_stat_)) { @@ -118,6 +120,8 @@ int ObBasicStatsEstimator::estimate(const ObTableStatParam ¶m, } else if (OB_FAIL(do_estimate(param.tenant_id_, raw_sql.string(), COPY_ALL_STAT, src_opt_stat, dst_opt_stats))) { LOG_WARN("failed to evaluate basic stats", K(ret)); + } else if (OB_FAIL(refine_basic_stats(param, extra, dst_opt_stats))) { + LOG_WARN("failed to refine basic stats", K(ret)); } else { LOG_TRACE("basic stats is collected", K(dst_opt_stats.count())); } @@ -790,5 +794,124 @@ int ObBasicStatsEstimator::generate_first_part_idx_map(const ObIArray return ret; } +/** + * @brief ObBasicStatsEstimator::refine_basic_stats + * when the user specify estimate_percent is too small, the sample data isn't enough to describe the + * overall data distribution, So we need consider refine it, and reset the appropriate estimate_percent + * to regather basic stats. + */ +int ObBasicStatsEstimator::refine_basic_stats(const ObTableStatParam ¶m, + const ObExtraParam &extra, + ObIArray &dst_opt_stats) +{ + int ret = OB_SUCCESS; + if (sample_value_ >= 0.000001 && sample_value_ < 100.0) { + for (int64_t i = 0; OB_SUCC(ret) && i < dst_opt_stats.count(); ++i) { + bool need_re_estimate = false; + ObExtraParam new_extra; + ObTableStatParam new_param; + ObSEArray tmp_opt_stats; + ObBasicStatsEstimator basic_re_est(ctx_); + if (OB_FAIL(check_stat_need_re_estimate(param, extra, dst_opt_stats.at(i), + need_re_estimate, new_param, new_extra))) { + LOG_WARN("failed to check stat need re-estimate", K(ret)); + } else if (!need_re_estimate) { + //do nothing + } else if (OB_FAIL(tmp_opt_stats.push_back(dst_opt_stats.at(i)))) { + LOG_WARN("failed to push back", K(ret)); + } else if (OB_FAIL(basic_re_est.estimate(new_param, new_extra, tmp_opt_stats))) { + LOG_WARN("failed to estimate basic statistics", K(ret)); + } else { + LOG_TRACE("Suceed to re-estimate stats", K(new_param), K(param)); + } + } + } + return ret; +} + +int ObBasicStatsEstimator::check_stat_need_re_estimate(const ObTableStatParam &origin_param, + const ObExtraParam &origin_extra, + ObOptStat &opt_stat, + bool &need_re_estimate, + ObTableStatParam &new_param, + ObExtraParam &new_extra) +{ + int ret = OB_SUCCESS; + need_re_estimate = false; + if (OB_ISNULL(opt_stat.table_stat_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(opt_stat.table_stat_)); + } else if (opt_stat.table_stat_->get_row_count() * sample_value_ / 100 >= MAGIC_MIN_SAMPLE_SIZE) { + //do nothing + } else if (OB_FAIL(new_param.assign(origin_param))) { + LOG_WARN("failed to assign", K(ret)); + } else { + need_re_estimate = true; + int64_t total_row_count = opt_stat.table_stat_->get_row_count(); + //1.set sample ratio + if (total_row_count <= MAGIC_SAMPLE_SIZE) { + new_param.sample_info_.is_sample_ = false; + new_param.sample_info_.sample_value_ = 0.0; + new_param.sample_info_.is_block_sample_ = false; + } else { + new_param.sample_info_.is_sample_ = true; + new_param.sample_info_.is_block_sample_ = false; + new_param.sample_info_.sample_value_ = (MAGIC_SAMPLE_SIZE * 100.0) / total_row_count; + new_param.sample_info_.sample_type_ = PercentSample; + } + //2.set partition info + new_extra.type_ = origin_extra.type_; + new_extra.nth_part_ = origin_extra.nth_part_; + bool find_it = (new_extra.type_ == TABLE_LEVEL); + if (new_extra.type_ == PARTITION_LEVEL) { + for (int64_t i = 0; !find_it && i < new_param.part_infos_.count(); ++i) { + if (opt_stat.table_stat_->get_partition_id() == new_param.part_infos_.at(i).part_id_) { + find_it = true; + new_extra.nth_part_ = i; + new_param.part_name_ = new_param.part_infos_.at(i).part_name_; + new_param.is_subpart_name_ = false; + } + } + } else if (new_extra.type_ == SUBPARTITION_LEVEL) { + for (int64_t i = 0; !find_it && i < new_param.subpart_infos_.count(); ++i) { + if (opt_stat.table_stat_->get_partition_id() == new_param.subpart_infos_.at(i).part_id_) { + find_it = true; + new_extra.nth_part_ = i; + new_param.part_name_ = new_param.subpart_infos_.at(i).part_name_; + new_param.is_subpart_name_ = true; + } + } + } + if (!find_it) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(new_param), KPC(opt_stat.table_stat_)); + } + //3.reset opt stat + if (OB_SUCC(ret)) { + opt_stat.table_stat_->set_row_count(0); + opt_stat.table_stat_->set_avg_row_size(0); + for (int64_t i = 0; OB_SUCC(ret) && i < opt_stat.column_stats_.count(); ++i) { + if (OB_ISNULL(opt_stat.column_stats_.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret)); + } else { + ObObj null_val; + null_val.set_null(); + opt_stat.column_stats_.at(i)->set_max_value(null_val); + opt_stat.column_stats_.at(i)->set_min_value(null_val); + opt_stat.column_stats_.at(i)->set_num_not_null(0); + opt_stat.column_stats_.at(i)->set_num_null(0); + opt_stat.column_stats_.at(i)->set_num_distinct(0); + opt_stat.column_stats_.at(i)->set_avg_len(0); + opt_stat.column_stats_.at(i)->set_llc_bitmap_size(ObColumnStat::NUM_LLC_BUCKET); + MEMSET(opt_stat.column_stats_.at(i)->get_llc_bitmap(), 0, ObColumnStat::NUM_LLC_BUCKET); + opt_stat.column_stats_.at(i)->get_histogram().reset(); + } + } + } + } + return ret; +} + } // end of common } // end of oceanbase diff --git a/src/share/stat/ob_basic_stats_estimator.h b/src/share/stat/ob_basic_stats_estimator.h index 92c3210ac..193c074b8 100644 --- a/src/share/stat/ob_basic_stats_estimator.h +++ b/src/share/stat/ob_basic_stats_estimator.h @@ -111,6 +111,17 @@ private: static int generate_first_part_idx_map(const ObIArray &all_part_infos, hash::ObHashMap &first_part_idx_map); + int refine_basic_stats(const ObTableStatParam ¶m, + const ObExtraParam &extra, + ObIArray &dst_opt_stats); + + int check_stat_need_re_estimate(const ObTableStatParam &origin_param, + const ObExtraParam &origin_extra, + ObOptStat &opt_stat, + bool &need_re_estimate, + ObTableStatParam &new_param, + ObExtraParam &new_extra); + }; } diff --git a/src/share/stat/ob_dbms_stats_executor.cpp b/src/share/stat/ob_dbms_stats_executor.cpp index c855c0e06..5ea0eaa1a 100644 --- a/src/share/stat/ob_dbms_stats_executor.cpp +++ b/src/share/stat/ob_dbms_stats_executor.cpp @@ -101,13 +101,15 @@ int ObDbmsStatsExecutor::gather_table_stats(ObExecContext &ctx, ObSEArray history_tab_handles; ObSEArray history_col_handles; //before write, we need record history stats. - if (OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param, + if (!param.is_temp_table_ && + OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param, history_tab_handles, history_col_handles))) { LOG_WARN("failed to get history stat handles", K(ret)); } else if (OB_FAIL(ObDbmsStatsUtils::split_batch_write(ctx, all_tstats, all_cstats))) { LOG_WARN("failed to split batch write", K(ret)); - } else if (OB_FAIL(ObDbmsStatsUtils::batch_write_history_stats(ctx, + } else if (!param.is_temp_table_ && + OB_FAIL(ObDbmsStatsUtils::batch_write_history_stats(ctx, history_tab_handles, history_col_handles))) { LOG_WARN("failed to batch write history stats", K(ret)); @@ -245,7 +247,8 @@ int ObDbmsStatsExecutor::set_table_stats(ObExecContext &ctx, if (OB_FAIL(do_set_table_stats(param, &table_stat))) { LOG_WARN("failed to do set table stats", K(ret)); ////before update, we need record history stats. - } else if (OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param.table_param_, + } else if (!param.table_param_.is_temp_table_ && + OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param.table_param_, history_tab_handles, history_col_handles))) { LOG_WARN("failed to get history stat handles", K(ret)); @@ -478,7 +481,8 @@ int ObDbmsStatsExecutor::delete_table_stats(ObExecContext &ctx, ObSEArray history_col_handles; int64_t affected_rows = 0; //before delete, we need record history stats. - if (OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param, + if (!param.is_temp_table_ && + OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param, history_tab_handles, history_col_handles))) { LOG_WARN("failed to get history stat handles", K(ret)); @@ -488,7 +492,7 @@ int ObDbmsStatsExecutor::delete_table_stats(ObExecContext &ctx, cascade_columns, affected_rows))) { LOG_WARN("failed to delete table stats", K(ret)); - } else if (affected_rows != 0 && + } else if (affected_rows != 0 && !param.is_temp_table_ && OB_FAIL(ObDbmsStatsUtils::batch_write_history_stats(ctx, history_tab_handles, history_col_handles))) { diff --git a/src/share/stat/ob_dbms_stats_export_import.cpp b/src/share/stat/ob_dbms_stats_export_import.cpp index c4479fc07..590f927e9 100644 --- a/src/share/stat/ob_dbms_stats_export_import.cpp +++ b/src/share/stat/ob_dbms_stats_export_import.cpp @@ -604,7 +604,7 @@ int ObDbmsStatsExportImport::do_import_stats(ObExecContext &ctx, ObSEArray history_tab_handles; ObSEArray history_col_handles; //before import, we need record history stats. - if (!is_index_stat && !all_tstats.empty() && + if (!is_index_stat && !all_tstats.empty() && !param.is_temp_table_ && OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param, history_tab_handles, history_col_handles))) { diff --git a/src/share/stat/ob_hybrid_hist_estimator.cpp b/src/share/stat/ob_hybrid_hist_estimator.cpp index cfa92e92f..9e904d2ba 100644 --- a/src/share/stat/ob_hybrid_hist_estimator.cpp +++ b/src/share/stat/ob_hybrid_hist_estimator.cpp @@ -55,7 +55,7 @@ int ObHybridHistEstimator::estimate(const ObTableStatParam ¶m, ObOptStat src_opt_stat; src_opt_stat.table_stat_ = &tab_stat; ObIArray &src_col_stats = src_opt_stat.column_stats_; - ObArenaAllocator allocator(ObModIds::OB_SQL_PARSER); + ObArenaAllocator allocator("ObHybridHist"); ObString raw_sql; ObString refine_raw_sql; int64_t refine_cnt = 0; @@ -462,8 +462,8 @@ int ObHybridHistEstimator::try_build_hybrid_hist(const ObColumnStatParam ¶m, * b. if total_row_count >= MAGIC_MAX_AUTO_SAMPLE_SIZE then: * i: if max_num_bkts <= DEFAULT_HISTOGRAM_BUCKET_NUM then choosing MAGIC_SAMPLE_SIZE; * ii: if max_num_bkts > DEFAULT_HISTOGRAM_BUCKET_NUM: - * (1): if max_num_bkts >= total_row_count * MAX_CUT_RATIO then choosing full table scan; - * (2): if max_num_bkts <= total_row_count * MAX_CUT_RATIO then choosing: + * (1): if max_num_bkts >= total_row_count * MAGIC_SAMPLE_CUT_RATIO then choosing full table scan; + * (2): if max_num_bkts <= total_row_count * MAGIC_SAMPLE_CUT_RATIO then choosing: * sample_size = MAGIC_SAMPLE_SIZE + MAGIC_BASE_SAMPLE_SIZE + (max_num_bkts - * DEFAULT_HISTOGRAM_BUCKET_NUM) * MAGIC_MIN_SAMPLE_SIZE * 0.01; * @@ -481,11 +481,6 @@ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count, bool &is_block_sample) { int ret = OB_SUCCESS; - const int64_t MAGIC_SAMPLE_SIZE = 5500; - const int64_t MAGIC_MAX_AUTO_SAMPLE_SIZE = 22000; - const int64_t MAGIC_MIN_SAMPLE_SIZE = 2500; - const int64_t MAGIC_BASE_SAMPLE_SIZE = 1000; - const double MAX_CUT_RATIO = 0.00962; if (0 == total_row_count) { need_sample = false; } else if (sample_info.is_sample_) { @@ -521,7 +516,7 @@ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count, is_block_sample = false; est_percent = (MAGIC_SAMPLE_SIZE * 100.0) / total_row_count; } else { - int64_t num_bound_bkts = static_cast(std::round(total_row_count * MAX_CUT_RATIO)); + int64_t num_bound_bkts = static_cast(std::round(total_row_count * MAGIC_SAMPLE_CUT_RATIO)); if (max_num_bkts >= num_bound_bkts) { need_sample = false; } else { diff --git a/src/share/stat/ob_opt_stat_manager.cpp b/src/share/stat/ob_opt_stat_manager.cpp index 59edb9a52..327164cce 100644 --- a/src/share/stat/ob_opt_stat_manager.cpp +++ b/src/share/stat/ob_opt_stat_manager.cpp @@ -279,7 +279,17 @@ int ObOptStatManager::update_table_stat(const uint64_t tenant_id, return ret; } -int ObOptStatManager::delete_table_stat(const uint64_t tenant_id, +int ObOptStatManager::delete_table_stat(uint64_t tenant_id, + const uint64_t ref_id, + int64_t &affected_rows) +{ + int ret = OB_SUCCESS; + ObSEArray part_ids; + bool cascade_column = true; + return delete_table_stat(tenant_id, ref_id, part_ids, cascade_column, affected_rows); +} + +int ObOptStatManager::delete_table_stat(uint64_t tenant_id, const uint64_t ref_id, const ObIArray &part_ids, const bool cascade_column, diff --git a/src/share/stat/ob_opt_stat_manager.h b/src/share/stat/ob_opt_stat_manager.h index 5865b6215..2492f92df 100644 --- a/src/share/stat/ob_opt_stat_manager.h +++ b/src/share/stat/ob_opt_stat_manager.h @@ -139,6 +139,10 @@ public: bool only_update_col_stat = false); int delete_table_stat(const uint64_t tenant_id, + const uint64_t ref_id, + int64_t &affected_rows); + + int delete_table_stat(uint64_t tenant_id, const uint64_t ref_id, const ObIArray &part_ids, const bool cascade_column, diff --git a/src/share/stat/ob_opt_table_stat.h b/src/share/stat/ob_opt_table_stat.h index 29a047582..c7433cfeb 100644 --- a/src/share/stat/ob_opt_table_stat.h +++ b/src/share/stat/ob_opt_table_stat.h @@ -121,7 +121,8 @@ public: data_version_(0), last_analyzed_(0), stattype_locked_(0), - modified_count_(0) {} + modified_count_(0), + sample_size_(0) {} ObOptTableStat(uint64_t table_id, int64_t partition_id, int64_t object_type, @@ -150,7 +151,8 @@ public: data_version_(data_version), last_analyzed_(0), stattype_locked_(0), - modified_count_(0) {} + modified_count_(0), + sample_size_(0) {} virtual ~ObOptTableStat() {} @@ -202,6 +204,9 @@ public: int64_t get_modified_count() const { return modified_count_; } void set_modified_count(int64_t modified_count) { modified_count_ = modified_count; } + int64_t get_sample_size() const { return sample_size_; } + void set_sample_size(int64_t sample_size) { sample_size_ = sample_size; } + virtual int64_t size() const { return sizeof(*this); @@ -245,6 +250,7 @@ public: last_analyzed_ = 0; stattype_locked_ = 0; modified_count_ = 0; + sample_size_ = 0; } TO_STRING_KV(K(table_id_), @@ -262,7 +268,8 @@ public: K(data_version_), K(last_analyzed_), K(stattype_locked_), - K(modified_count_)); + K(modified_count_), + K(sample_size_)); private: uint64_t table_id_; @@ -283,6 +290,7 @@ private: int64_t last_analyzed_; uint64_t stattype_locked_; int64_t modified_count_; + int64_t sample_size_; }; } diff --git a/src/share/stat/ob_stat_define.cpp b/src/share/stat/ob_stat_define.cpp index 89ed264b7..5631c90a1 100644 --- a/src/share/stat/ob_stat_define.cpp +++ b/src/share/stat/ob_stat_define.cpp @@ -116,6 +116,7 @@ int ObTableStatParam::assign(const ObTableStatParam &other) global_data_part_id_ = other.global_data_part_id_; data_table_id_ = other.data_table_id_; need_estimate_block_ = other.need_estimate_block_; + is_temp_table_ = other.is_temp_table_; if (OB_FAIL(part_infos_.assign(other.part_infos_))) { LOG_WARN("failed to assign", K(ret)); } else if (OB_FAIL(subpart_infos_.assign(other.subpart_infos_))) { diff --git a/src/share/stat/ob_stat_define.h b/src/share/stat/ob_stat_define.h index d6c93d5db..af9b88604 100644 --- a/src/share/stat/ob_stat_define.h +++ b/src/share/stat/ob_stat_define.h @@ -50,6 +50,11 @@ enum StatOptionFlags const static double OPT_DEFAULT_STALE_PERCENT = 0.1; const static int64_t OPT_DEFAULT_STATS_RETENTION = 31; const static int64_t OPT_STATS_MAX_VALUE_CAHR_LEN = 128; +const int64_t MAGIC_SAMPLE_SIZE = 5500; +const int64_t MAGIC_MAX_AUTO_SAMPLE_SIZE = 22000; +const int64_t MAGIC_MIN_SAMPLE_SIZE = 2500; +const int64_t MAGIC_BASE_SAMPLE_SIZE = 1000; +const double MAGIC_SAMPLE_CUT_RATIO = 0.00962; enum StatLevel { @@ -291,7 +296,8 @@ struct ObTableStatParam { global_tablet_id_(0), global_data_part_id_(INVALID_GLOBAL_PART_ID), data_table_id_(INVALID_GLOBAL_PART_ID), - need_estimate_block_(true) + need_estimate_block_(true), + is_temp_table_(false) {} int assign(const ObTableStatParam &other); @@ -363,6 +369,7 @@ struct ObTableStatParam { int64_t global_data_part_id_; // used to check wether table is locked, while gathering index stats. int64_t data_table_id_; // the data table id for index schema bool need_estimate_block_;//need estimate macro/micro block count + bool is_temp_table_; TO_STRING_KV(K(tenant_id_), K(db_name_), @@ -406,7 +413,8 @@ struct ObTableStatParam { K(global_tablet_id_), K(global_data_part_id_), K(data_table_id_), - K(need_estimate_block_)); + K(need_estimate_block_), + K(is_temp_table_)); }; struct ObOptStat diff --git a/src/share/stat/ob_stats_estimator.cpp b/src/share/stat/ob_stats_estimator.cpp index 4411166ea..d4c133c93 100644 --- a/src/share/stat/ob_stats_estimator.cpp +++ b/src/share/stat/ob_stats_estimator.cpp @@ -32,7 +32,8 @@ ObStatsEstimator::ObStatsEstimator(ObExecContext &ctx) : group_by_string_(), where_string_(), stat_items_(), - results_() + results_(), + sample_value_(100.0) {} int ObStatsEstimator::gen_select_filed() @@ -116,6 +117,27 @@ int ObStatsEstimator::fill_sample_info(common::ObIAllocator &alloc, return ret; } +int ObStatsEstimator::fill_sample_info(common::ObIAllocator &alloc, + const ObAnalyzeSampleInfo &sample_info) +{ + int ret = OB_SUCCESS; + if (!sample_info.is_sample_ || sample_info.sample_type_ == SampleType::RowSample) { + } else if (OB_UNLIKELY(sample_info.sample_value_ < 0.000001 || sample_info.sample_value_ > 100.0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(sample_info)); + } else if (sample_info.sample_value_ == 100.0) { + //do nothing + } else if (OB_FAIL(fill_sample_info(alloc, + sample_info.sample_value_, + sample_info.is_block_sample_, + sample_hint_))) { + LOG_WARN("failed to fill sample info", K(ret)); + } else { + sample_value_ = sample_info.sample_value_; + } + return ret; +} + int ObStatsEstimator::fill_parallel_info(common::ObIAllocator &alloc, int64_t degree) { @@ -421,8 +443,13 @@ int ObStatsEstimator::copy_opt_stat(ObOptStat &src_opt_stat, LOG_WARN("get unexpected null", K(ret), K(dst_opt_stats.at(i).table_stat_)); } else if (dst_opt_stats.at(i).table_stat_->get_partition_id() == partition_id) { find_it = true; - dst_opt_stats.at(i).table_stat_->set_row_count(tmp_tab_stat->get_row_count()); + int64_t row_cnt = tmp_tab_stat->get_row_count(); + if (sample_value_ >= 0.000001 && sample_value_ < 100.0) { + row_cnt = static_cast(row_cnt * 100 / sample_value_); + } + dst_opt_stats.at(i).table_stat_->set_row_count(row_cnt); dst_opt_stats.at(i).table_stat_->set_avg_row_size(tmp_tab_stat->get_avg_row_size()); + dst_opt_stats.at(i).table_stat_->set_sample_size(tmp_tab_stat->get_row_count()); if (OB_FAIL(copy_col_stats(tmp_col_stats, dst_opt_stats.at(i).column_stats_))) { LOG_WARN("failed to copy col stat", K(ret)); } else {/*do nothing*/} @@ -449,11 +476,19 @@ int ObStatsEstimator::copy_col_stats(ObIArray &src_col_stats, ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(dst_col_stats.at(i))); } else { + int64_t num_not_null = src_col_stats.at(i)->get_num_not_null(); + int64_t num_null = src_col_stats.at(i)->get_num_null(); + int64_t num_distinct = src_col_stats.at(i)->get_num_distinct(); + if (sample_value_ >= 0.000001 && sample_value_ < 100.0) { + num_not_null = static_cast(num_not_null * 100 / sample_value_); + num_null = static_cast(num_null * 100 / sample_value_); + num_distinct = static_cast(num_distinct * 100 / sample_value_); + } dst_col_stats.at(i)->set_max_value(src_col_stats.at(i)->get_max_value()); dst_col_stats.at(i)->set_min_value(src_col_stats.at(i)->get_min_value()); - dst_col_stats.at(i)->set_num_not_null(src_col_stats.at(i)->get_num_not_null()); - dst_col_stats.at(i)->set_num_null(src_col_stats.at(i)->get_num_null()); - dst_col_stats.at(i)->set_num_distinct(src_col_stats.at(i)->get_num_distinct()); + dst_col_stats.at(i)->set_num_not_null(num_not_null); + dst_col_stats.at(i)->set_num_null(num_null); + dst_col_stats.at(i)->set_num_distinct(num_distinct); dst_col_stats.at(i)->set_avg_len(src_col_stats.at(i)->get_avg_len()); if (OB_ISNULL(dst_col_stats.at(i)->get_llc_bitmap()) || OB_ISNULL(src_col_stats.at(i)->get_llc_bitmap()) || @@ -471,7 +506,7 @@ int ObStatsEstimator::copy_col_stats(ObIArray &src_col_stats, dst_col_stats.at(i)->set_llc_bitmap_size(src_col_stats.at(i)->get_llc_bitmap_size()); ObHistogram &src_hist = src_col_stats.at(i)->get_histogram(); dst_col_stats.at(i)->get_histogram().set_type(src_hist.get_type()); - dst_col_stats.at(i)->get_histogram().set_sample_size(src_hist.get_sample_size()); + dst_col_stats.at(i)->get_histogram().set_sample_size(src_col_stats.at(i)->get_num_not_null()); dst_col_stats.at(i)->get_histogram().set_bucket_cnt(src_hist.get_bucket_cnt()); dst_col_stats.at(i)->get_histogram().set_density(src_hist.get_density()); if (OB_FAIL(append(dst_col_stats.at(i)->get_histogram().get_buckets(), diff --git a/src/share/stat/ob_stats_estimator.h b/src/share/stat/ob_stats_estimator.h index 03feb44a8..97dca2b87 100644 --- a/src/share/stat/ob_stats_estimator.h +++ b/src/share/stat/ob_stats_estimator.h @@ -70,6 +70,9 @@ protected: bool block_sample, ObString &sample_hint); + int fill_sample_info(common::ObIAllocator &alloc, + const ObAnalyzeSampleInfo &sample_info); + int fill_parallel_info(common::ObIAllocator &alloc, int64_t degree); @@ -116,6 +119,7 @@ protected: ObArray stat_items_; ObArray results_; + double sample_value_; }; diff --git a/src/sql/session/ob_sql_session_info.cpp b/src/sql/session/ob_sql_session_info.cpp index 4a3d3b598..5e2833717 100644 --- a/src/sql/session/ob_sql_session_info.cpp +++ b/src/sql/session/ob_sql_session_info.cpp @@ -48,6 +48,7 @@ #include "lib/utility/utility.h" #include "lib/utility/ob_proto_trans_util.h" #include "lib/allocator/ob_mod_define.h" +#include "share/stat/ob_opt_stat_manager.h" using namespace oceanbase::sql; @@ -621,6 +622,11 @@ int ObSQLSessionInfo::delete_from_oracle_temp_tables(const obrpc::ObDropTableArg } if (OB_SUCC(ret)) { LOG_DEBUG("succeed to delete rows in oracle temporary table", K(sql), K(affect_rows)); + //delete relation temp table stats. + if (OB_FAIL(ObOptStatManager::get_instance().delete_table_stat(tenant_id, + table_schema->get_table_id(), affect_rows))) { + LOG_WARN("failed to delete table stats", K(ret)); + } } else { LOG_WARN("failed to delete rows in oracle temporary table", K(ret), K(sql)); }