[CP] [CP] [CP] support basic optimizer stats specify estimate_percent option and temp table gather

This commit is contained in:
wangt1xiuyi 2022-12-20 09:41:38 +00:00 committed by ob-robot
parent 139119acfe
commit 84a71fba0a
14 changed files with 254 additions and 91 deletions

View File

@ -244,8 +244,8 @@ int ObDbmsStats::gather_index_stats(ObExecContext &ctx, ParamStore &params, ObOb
LOG_WARN("failed to get common rpc", K(ret));
} else if (lib::is_oracle_mode() && !params.at(11).is_null()) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("table name not should be specified in gather index stats", K(ret));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name not should be specified in gather index stats");
LOG_WARN("table name shouldn't be specified in gather index stats", K(ret));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name shouldn't be specified in gather index stats");
} else if (lib::is_mysql_mode() && params.at(11).is_null()) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("table name should be specified in gather index stats", K(ret));
@ -639,8 +639,8 @@ int ObDbmsStats::set_index_stats(ObExecContext &ctx, ParamStore &params, ObObj &
LOG_WARN("failed to get common rpc", K(ret));
} else if (lib::is_oracle_mode() && !params.at(22).is_null()) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("table name not should be specified in gather index stats", K(ret));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name not should be specified in gather index stats");
LOG_WARN("table name shouldn't be specified in gather index stats", K(ret));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name shouldn't be specified in gather index stats");
} else if (lib::is_mysql_mode() && params.at(22).is_null()) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("table name should be specified in gather index stats", K(ret));
@ -1001,8 +1001,8 @@ int ObDbmsStats::delete_index_stats(ObExecContext &ctx, ParamStore &params, ObOb
LOG_WARN("failed to get common rpc", K(ret));
} else if (lib::is_oracle_mode() && !params.at(10).is_null()) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("table name not should be specified in gather index stats", K(ret));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name not should be specified in gather index stats");
LOG_WARN("table name shouldn't be specified in gather index stats", K(ret));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name shouldn't be specified in gather index stats");
} else if (lib::is_mysql_mode() && params.at(10).is_null()) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("table name should be specified in gather index stats", K(ret));
@ -1245,10 +1245,6 @@ int ObDbmsStats::export_table_stats(ObExecContext &ctx, ParamStore &params, ObOb
K(stat_table_param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_),
to_cstring(stat_table_param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (!params.at(4).is_null() && OB_FAIL(params.at(4).get_varchar(stat_param.stat_id_))) {
LOG_WARN("failed to get stat id", K(ret));
} else if (!params.at(5).is_null() && OB_FAIL(params.at(5).get_bool(stat_param.cascade_))) {
@ -1322,10 +1318,6 @@ int ObDbmsStats::export_column_stats(sql::ObExecContext &ctx,
K(stat_table_param.db_name_), K(stat_table_param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_),
to_cstring(stat_table_param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (!params.at(5).is_null() && OB_FAIL(params.at(5).get_varchar((stat_param.stat_id_)))) {
LOG_WARN("failed to get stat id ", K(ret));
} else {
@ -1378,10 +1370,6 @@ int ObDbmsStats::export_schema_stats(ObExecContext &ctx, ParamStore &params, ObO
K(stat_table_param.db_name_), K(stat_table_param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_),
to_cstring(stat_table_param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (!params.at(2).is_null() && OB_FAIL(params.at(5).get_varchar((stat_table_param.stat_id_)))) {
LOG_WARN("failed to get stat id ", K(ret));
} else {
@ -1437,8 +1425,8 @@ int ObDbmsStats::export_index_stats(ObExecContext &ctx, ParamStore &params, ObOb
LOG_WARN("failed to check tenant is restore", K(ret));
} else if (lib::is_oracle_mode() && !params.at(6).is_null()) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("table name not should be specified in gather index stats", K(ret));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name not should be specified in gather index stats");
LOG_WARN("table name shouldn't be specified in gather index stats", K(ret));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name shouldn't be specified in gather index stats");
} else if (lib::is_mysql_mode() && params.at(6).is_null()) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("table name should be specified in gather index stats", K(ret));
@ -1463,10 +1451,6 @@ int ObDbmsStats::export_index_stats(ObExecContext &ctx, ParamStore &params, ObOb
K(stat_table_param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_),
to_cstring(stat_table_param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (!params.at(4).is_null() && OB_FAIL(params.at(4).get_varchar(index_stat_param.stat_id_))) {
LOG_WARN("failed to get stat id", K(ret));
} else {
@ -1563,10 +1547,6 @@ int ObDbmsStats::import_table_stats(ObExecContext &ctx, ParamStore &params, ObOb
K(stat_table_param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_),
to_cstring(stat_table_param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (!params.at(4).is_null() && OB_FAIL(params.at(4).get_varchar(stat_param.stat_id_))) {
LOG_WARN("failed to get stat id ", K(ret));
} else if (!params.at(5).is_null() && OB_FAIL(params.at(5).get_bool(stat_param.cascade_))) {
@ -1679,10 +1659,6 @@ int ObDbmsStats::import_column_stats(sql::ObExecContext &ctx,
K(stat_table_param.db_name_), K(stat_table_param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_),
to_cstring(stat_table_param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (!params.at(5).is_null() && OB_FAIL(params.at(5).get_varchar(stat_param.stat_id_))) {
LOG_WARN("failed to get stat id ", K(ret));
} else if (!params.at(7).is_null() && OB_FAIL(params.at(7).get_bool(stat_param.no_invalidate_))) {
@ -1767,10 +1743,6 @@ int ObDbmsStats::import_schema_stats(ObExecContext &ctx, ParamStore &params, ObO
K(stat_table_param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_),
to_cstring(stat_table_param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (!params.at(2).is_null() && OB_FAIL(params.at(4).get_varchar(stat_table_param.stat_id_))) {
LOG_WARN("failed to get stat id ", K(ret));
} else {
@ -1852,8 +1824,8 @@ int ObDbmsStats::import_index_stats(ObExecContext &ctx, ParamStore &params, ObOb
LOG_WARN("failed to get common rpc", K(ret));
} else if (lib::is_oracle_mode() && !params.at(8).is_null()) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("table name not should be specified in gather index stats", K(ret));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name not should be specified in gather index stats");
LOG_WARN("table name shouldn't be specified in gather index stats", K(ret));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"table name shouldn't be specified in gather index stats");
} else if (lib::is_mysql_mode() && params.at(8).is_null()) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("table name should be specified in gather index stats", K(ret));
@ -1878,10 +1850,6 @@ int ObDbmsStats::import_index_stats(ObExecContext &ctx, ParamStore &params, ObOb
K(stat_table_param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(stat_table_param.db_name_),
to_cstring(stat_table_param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (!params.at(4).is_null() && OB_FAIL(params.at(4).get_varchar(index_stat_param.stat_id_))) {
LOG_WARN("failed to get stat id ", K(ret));
} else if (!params.at(6).is_null() && OB_FAIL(params.at(6).get_bool(index_stat_param.no_invalidate_))) {
@ -2384,6 +2352,7 @@ int ObDbmsStats::restore_table_stats(sql::ObExecContext &ctx,
LOG_WARN("failed to get force", K(ret));
} else if (!params.at(5).is_null() && OB_FAIL(params.at(5).get_bool(stat_param.no_invalidate_))) {
LOG_WARN("failed to get no_invalidate", K(ret));
} else if (stat_param.is_temp_table_) {//do nothing
// oracle don't do this, compatible oracle temporarily
// } else if (stat_param.force_ &&
// OB_FAIL(ObDbmsStatsLockUnlock::fill_stat_locked(ctx, stat_param))) {
@ -3020,10 +2989,6 @@ int ObDbmsStats::parse_table_part_info(ObExecContext &ctx,
ret = OB_TABLE_NOT_EXIST;
LOG_WARN("table schema is null", K(ret), K(table_schema), K(param.db_name_), K(param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(param.db_name_), to_cstring(param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (OB_FAIL(get_table_part_infos(table_schema,
param.part_infos_,
param.subpart_infos_,
@ -3074,7 +3039,7 @@ int ObDbmsStats::parse_table_part_info(ObExecContext &ctx,
LOG_WARN("params have null", K(ret), K(schema_guard));
} else if (OB_FAIL(parse_table_info(ctx, stat_table, table_schema, param))) {
LOG_WARN("failed to parse table info", K(ret));
} else if (OB_UNLIKELY(table_schema->is_view_table() || table_schema->is_tmp_table())) {
} else if (OB_UNLIKELY(table_schema->is_view_table())) {
ret = OB_TABLE_NOT_EXIST;
} else if (OB_FAIL(get_table_part_infos(table_schema,
param.part_infos_,
@ -3138,10 +3103,6 @@ int ObDbmsStats::parse_index_part_info(ObExecContext &ctx,
ret = OB_TABLE_NOT_EXIST;
LOG_WARN("table schema is null", K(ret), K(index_schema), K(param.db_name_),
K(param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (OB_FAIL(set_param_global_part_id(ctx, param, true, table_schema->get_table_id()))) {
LOG_WARN("fail to set global part id for index data table", K(ret));
} else if (OB_FAIL(ob_write_string(ctx.get_allocator(),
@ -3332,10 +3293,6 @@ int ObDbmsStats::parse_set_table_info(ObExecContext &ctx,
ret = OB_TABLE_NOT_EXIST;
LOG_WARN("table schema is null", K(ret), K(table_schema), K(param.db_name_), K(param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(param.db_name_), to_cstring(param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (OB_FAIL(parse_set_partition_name(table_schema, part_name, param))) {
LOG_WARN("failed to parser part info", K(ret));
} else if (OB_FAIL(init_column_stat_params(ctx.get_allocator(),
@ -3387,10 +3344,6 @@ int ObDbmsStats::parse_set_column_stats(ObExecContext &ctx,
ret = OB_TABLE_NOT_EXIST;
LOG_WARN("table schema is null", K(ret), K(table_schema), K(param.db_name_), K(param.tab_name_));
LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(param.db_name_), to_cstring(param.tab_name_));
} else if (table_schema->is_tmp_table()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("dbms_stats with temp table not support", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "dbms_stats with temp table");
} else if (OB_FAIL(colname.get_string(column_name))) {
LOG_WARN("failed to get column name", K(ret));
} else if (OB_UNLIKELY(column_name.empty())) {
@ -3660,6 +3613,7 @@ int ObDbmsStats::parse_table_info(ObExecContext &ctx,
LOG_WARN("failed to write string", K(ret));
} else {
param.tenant_id_ = session->get_effective_tenant_id();
param.is_temp_table_ = table_schema->is_tmp_table();
}
if (OB_SUCC(ret) && table_schema != NULL) {
param.table_id_ = table_schema->get_table_id();

View File

@ -54,7 +54,7 @@ int ObBasicStatsEstimator::estimate(const ObTableStatParam &param,
src_opt_stat.table_stat_ = &tab_stat;
ObOptTableStat *src_tab_stat = src_opt_stat.table_stat_;
ObIArray<ObOptColumnStat*> &src_col_stats = src_opt_stat.column_stats_;
ObArenaAllocator allocator(ObModIds::OB_SQL_PARSER);
ObArenaAllocator allocator("ObBasicStats");
ObSqlString raw_sql;
int64_t duration_time = -1;
// Note that there are dependences between different kinds of statistics
@ -68,11 +68,11 @@ int ObBasicStatsEstimator::estimate(const ObTableStatParam &param,
column_params.count(),
src_col_stats))) {
LOG_WARN("failed init col stats", K(ret));
} else if (OB_FAIL(add_hint(no_rewrite, ctx_.get_allocator()))) {
} else if (OB_FAIL(add_hint(no_rewrite, allocator))) {
LOG_WARN("failed to add no_rewrite", K(ret));
} else if (OB_FAIL(add_from_table(param.db_name_, param.tab_name_))) {
LOG_WARN("failed to add from table", K(ret));
} else if (OB_FAIL(fill_parallel_info(ctx_.get_allocator(), param.degree_))) {
} else if (OB_FAIL(fill_parallel_info(allocator, param.degree_))) {
LOG_WARN("failed to add query sql parallel info", K(ret));
} else if (OB_FAIL(ObDbmsStatsUtils::get_valid_duration_time(extra.start_time_,
param.duration_time_,
@ -80,13 +80,15 @@ int ObBasicStatsEstimator::estimate(const ObTableStatParam &param,
LOG_WARN("failed to get valid duration time", K(ret));
} else if (OB_FAIL(fill_query_timeout_info(ctx_.get_allocator(), duration_time))) {
LOG_WARN("failed to fill query timeout info", K(ret));
} else if (OB_FAIL(fill_sample_info(allocator, param.sample_info_))) {
LOG_WARN("failed to fill sample info", K(ret));
} else if (dst_opt_stats.count() > 1 &&
OB_FAIL(fill_group_by_info(ctx_.get_allocator(), param, extra, calc_part_id_str))) {
OB_FAIL(fill_group_by_info(allocator, param, extra, calc_part_id_str))) {
LOG_WARN("failed to add query sql partition info", K(ret));
} else if (OB_FAIL(add_stat_item(ObStatRowCount(&param, src_tab_stat)))) {
LOG_WARN("failed to add row count", K(ret));
} else if (calc_part_id_str.empty()) {
if (OB_FAIL(fill_partition_info(ctx_.get_allocator(), param, extra))) {
if (OB_FAIL(fill_partition_info(allocator, param, extra))) {
LOG_WARN("failed to add query sql parallel info", K(ret));
} else if (OB_UNLIKELY(dst_opt_stats.count() != 1) ||
OB_ISNULL(dst_opt_stats.at(0).table_stat_)) {
@ -118,6 +120,8 @@ int ObBasicStatsEstimator::estimate(const ObTableStatParam &param,
} else if (OB_FAIL(do_estimate(param.tenant_id_, raw_sql.string(), COPY_ALL_STAT,
src_opt_stat, dst_opt_stats))) {
LOG_WARN("failed to evaluate basic stats", K(ret));
} else if (OB_FAIL(refine_basic_stats(param, extra, dst_opt_stats))) {
LOG_WARN("failed to refine basic stats", K(ret));
} else {
LOG_TRACE("basic stats is collected", K(dst_opt_stats.count()));
}
@ -790,5 +794,124 @@ int ObBasicStatsEstimator::generate_first_part_idx_map(const ObIArray<PartInfo>
return ret;
}
/**
* @brief ObBasicStatsEstimator::refine_basic_stats
* when the user specify estimate_percent is too small, the sample data isn't enough to describe the
* overall data distribution, So we need consider refine it, and reset the appropriate estimate_percent
* to regather basic stats.
*/
int ObBasicStatsEstimator::refine_basic_stats(const ObTableStatParam &param,
const ObExtraParam &extra,
ObIArray<ObOptStat> &dst_opt_stats)
{
int ret = OB_SUCCESS;
if (sample_value_ >= 0.000001 && sample_value_ < 100.0) {
for (int64_t i = 0; OB_SUCC(ret) && i < dst_opt_stats.count(); ++i) {
bool need_re_estimate = false;
ObExtraParam new_extra;
ObTableStatParam new_param;
ObSEArray<ObOptStat, 1> tmp_opt_stats;
ObBasicStatsEstimator basic_re_est(ctx_);
if (OB_FAIL(check_stat_need_re_estimate(param, extra, dst_opt_stats.at(i),
need_re_estimate, new_param, new_extra))) {
LOG_WARN("failed to check stat need re-estimate", K(ret));
} else if (!need_re_estimate) {
//do nothing
} else if (OB_FAIL(tmp_opt_stats.push_back(dst_opt_stats.at(i)))) {
LOG_WARN("failed to push back", K(ret));
} else if (OB_FAIL(basic_re_est.estimate(new_param, new_extra, tmp_opt_stats))) {
LOG_WARN("failed to estimate basic statistics", K(ret));
} else {
LOG_TRACE("Suceed to re-estimate stats", K(new_param), K(param));
}
}
}
return ret;
}
int ObBasicStatsEstimator::check_stat_need_re_estimate(const ObTableStatParam &origin_param,
const ObExtraParam &origin_extra,
ObOptStat &opt_stat,
bool &need_re_estimate,
ObTableStatParam &new_param,
ObExtraParam &new_extra)
{
int ret = OB_SUCCESS;
need_re_estimate = false;
if (OB_ISNULL(opt_stat.table_stat_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", K(ret), K(opt_stat.table_stat_));
} else if (opt_stat.table_stat_->get_row_count() * sample_value_ / 100 >= MAGIC_MIN_SAMPLE_SIZE) {
//do nothing
} else if (OB_FAIL(new_param.assign(origin_param))) {
LOG_WARN("failed to assign", K(ret));
} else {
need_re_estimate = true;
int64_t total_row_count = opt_stat.table_stat_->get_row_count();
//1.set sample ratio
if (total_row_count <= MAGIC_SAMPLE_SIZE) {
new_param.sample_info_.is_sample_ = false;
new_param.sample_info_.sample_value_ = 0.0;
new_param.sample_info_.is_block_sample_ = false;
} else {
new_param.sample_info_.is_sample_ = true;
new_param.sample_info_.is_block_sample_ = false;
new_param.sample_info_.sample_value_ = (MAGIC_SAMPLE_SIZE * 100.0) / total_row_count;
new_param.sample_info_.sample_type_ = PercentSample;
}
//2.set partition info
new_extra.type_ = origin_extra.type_;
new_extra.nth_part_ = origin_extra.nth_part_;
bool find_it = (new_extra.type_ == TABLE_LEVEL);
if (new_extra.type_ == PARTITION_LEVEL) {
for (int64_t i = 0; !find_it && i < new_param.part_infos_.count(); ++i) {
if (opt_stat.table_stat_->get_partition_id() == new_param.part_infos_.at(i).part_id_) {
find_it = true;
new_extra.nth_part_ = i;
new_param.part_name_ = new_param.part_infos_.at(i).part_name_;
new_param.is_subpart_name_ = false;
}
}
} else if (new_extra.type_ == SUBPARTITION_LEVEL) {
for (int64_t i = 0; !find_it && i < new_param.subpart_infos_.count(); ++i) {
if (opt_stat.table_stat_->get_partition_id() == new_param.subpart_infos_.at(i).part_id_) {
find_it = true;
new_extra.nth_part_ = i;
new_param.part_name_ = new_param.subpart_infos_.at(i).part_name_;
new_param.is_subpart_name_ = true;
}
}
}
if (!find_it) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", K(ret), K(new_param), KPC(opt_stat.table_stat_));
}
//3.reset opt stat
if (OB_SUCC(ret)) {
opt_stat.table_stat_->set_row_count(0);
opt_stat.table_stat_->set_avg_row_size(0);
for (int64_t i = 0; OB_SUCC(ret) && i < opt_stat.column_stats_.count(); ++i) {
if (OB_ISNULL(opt_stat.column_stats_.at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", K(ret));
} else {
ObObj null_val;
null_val.set_null();
opt_stat.column_stats_.at(i)->set_max_value(null_val);
opt_stat.column_stats_.at(i)->set_min_value(null_val);
opt_stat.column_stats_.at(i)->set_num_not_null(0);
opt_stat.column_stats_.at(i)->set_num_null(0);
opt_stat.column_stats_.at(i)->set_num_distinct(0);
opt_stat.column_stats_.at(i)->set_avg_len(0);
opt_stat.column_stats_.at(i)->set_llc_bitmap_size(ObColumnStat::NUM_LLC_BUCKET);
MEMSET(opt_stat.column_stats_.at(i)->get_llc_bitmap(), 0, ObColumnStat::NUM_LLC_BUCKET);
opt_stat.column_stats_.at(i)->get_histogram().reset();
}
}
}
}
return ret;
}
} // end of common
} // end of oceanbase

View File

@ -111,6 +111,17 @@ private:
static int generate_first_part_idx_map(const ObIArray<PartInfo> &all_part_infos,
hash::ObHashMap<int64_t, int64_t> &first_part_idx_map);
int refine_basic_stats(const ObTableStatParam &param,
const ObExtraParam &extra,
ObIArray<ObOptStat> &dst_opt_stats);
int check_stat_need_re_estimate(const ObTableStatParam &origin_param,
const ObExtraParam &origin_extra,
ObOptStat &opt_stat,
bool &need_re_estimate,
ObTableStatParam &new_param,
ObExtraParam &new_extra);
};
}

View File

@ -101,13 +101,15 @@ int ObDbmsStatsExecutor::gather_table_stats(ObExecContext &ctx,
ObSEArray<ObOptTableStatHandle, 4> history_tab_handles;
ObSEArray<ObOptColumnStatHandle, 4> history_col_handles;
//before write, we need record history stats.
if (OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param,
if (!param.is_temp_table_ &&
OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param,
history_tab_handles,
history_col_handles))) {
LOG_WARN("failed to get history stat handles", K(ret));
} else if (OB_FAIL(ObDbmsStatsUtils::split_batch_write(ctx, all_tstats, all_cstats))) {
LOG_WARN("failed to split batch write", K(ret));
} else if (OB_FAIL(ObDbmsStatsUtils::batch_write_history_stats(ctx,
} else if (!param.is_temp_table_ &&
OB_FAIL(ObDbmsStatsUtils::batch_write_history_stats(ctx,
history_tab_handles,
history_col_handles))) {
LOG_WARN("failed to batch write history stats", K(ret));
@ -245,7 +247,8 @@ int ObDbmsStatsExecutor::set_table_stats(ObExecContext &ctx,
if (OB_FAIL(do_set_table_stats(param, &table_stat))) {
LOG_WARN("failed to do set table stats", K(ret));
////before update, we need record history stats.
} else if (OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param.table_param_,
} else if (!param.table_param_.is_temp_table_ &&
OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param.table_param_,
history_tab_handles,
history_col_handles))) {
LOG_WARN("failed to get history stat handles", K(ret));
@ -478,7 +481,8 @@ int ObDbmsStatsExecutor::delete_table_stats(ObExecContext &ctx,
ObSEArray<ObOptColumnStatHandle, 4> history_col_handles;
int64_t affected_rows = 0;
//before delete, we need record history stats.
if (OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param,
if (!param.is_temp_table_ &&
OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param,
history_tab_handles,
history_col_handles))) {
LOG_WARN("failed to get history stat handles", K(ret));
@ -488,7 +492,7 @@ int ObDbmsStatsExecutor::delete_table_stats(ObExecContext &ctx,
cascade_columns,
affected_rows))) {
LOG_WARN("failed to delete table stats", K(ret));
} else if (affected_rows != 0 &&
} else if (affected_rows != 0 && !param.is_temp_table_ &&
OB_FAIL(ObDbmsStatsUtils::batch_write_history_stats(ctx,
history_tab_handles,
history_col_handles))) {

View File

@ -604,7 +604,7 @@ int ObDbmsStatsExportImport::do_import_stats(ObExecContext &ctx,
ObSEArray<ObOptTableStatHandle, 4> history_tab_handles;
ObSEArray<ObOptColumnStatHandle, 4> history_col_handles;
//before import, we need record history stats.
if (!is_index_stat && !all_tstats.empty() &&
if (!is_index_stat && !all_tstats.empty() && !param.is_temp_table_ &&
OB_FAIL(ObDbmsStatsHistoryManager::get_history_stat_handles(ctx, param,
history_tab_handles,
history_col_handles))) {

View File

@ -55,7 +55,7 @@ int ObHybridHistEstimator::estimate(const ObTableStatParam &param,
ObOptStat src_opt_stat;
src_opt_stat.table_stat_ = &tab_stat;
ObIArray<ObOptColumnStat*> &src_col_stats = src_opt_stat.column_stats_;
ObArenaAllocator allocator(ObModIds::OB_SQL_PARSER);
ObArenaAllocator allocator("ObHybridHist");
ObString raw_sql;
ObString refine_raw_sql;
int64_t refine_cnt = 0;
@ -462,8 +462,8 @@ int ObHybridHistEstimator::try_build_hybrid_hist(const ObColumnStatParam &param,
* b. if total_row_count >= MAGIC_MAX_AUTO_SAMPLE_SIZE then:
* i: if max_num_bkts <= DEFAULT_HISTOGRAM_BUCKET_NUM then choosing MAGIC_SAMPLE_SIZE;
* ii: if max_num_bkts > DEFAULT_HISTOGRAM_BUCKET_NUM:
* (1): if max_num_bkts >= total_row_count * MAX_CUT_RATIO then choosing full table scan;
* (2): if max_num_bkts <= total_row_count * MAX_CUT_RATIO then choosing:
* (1): if max_num_bkts >= total_row_count * MAGIC_SAMPLE_CUT_RATIO then choosing full table scan;
* (2): if max_num_bkts <= total_row_count * MAGIC_SAMPLE_CUT_RATIO then choosing:
* sample_size = MAGIC_SAMPLE_SIZE + MAGIC_BASE_SAMPLE_SIZE + (max_num_bkts -
* DEFAULT_HISTOGRAM_BUCKET_NUM) * MAGIC_MIN_SAMPLE_SIZE * 0.01;
*
@ -481,11 +481,6 @@ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count,
bool &is_block_sample)
{
int ret = OB_SUCCESS;
const int64_t MAGIC_SAMPLE_SIZE = 5500;
const int64_t MAGIC_MAX_AUTO_SAMPLE_SIZE = 22000;
const int64_t MAGIC_MIN_SAMPLE_SIZE = 2500;
const int64_t MAGIC_BASE_SAMPLE_SIZE = 1000;
const double MAX_CUT_RATIO = 0.00962;
if (0 == total_row_count) {
need_sample = false;
} else if (sample_info.is_sample_) {
@ -521,7 +516,7 @@ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count,
is_block_sample = false;
est_percent = (MAGIC_SAMPLE_SIZE * 100.0) / total_row_count;
} else {
int64_t num_bound_bkts = static_cast<int64_t>(std::round(total_row_count * MAX_CUT_RATIO));
int64_t num_bound_bkts = static_cast<int64_t>(std::round(total_row_count * MAGIC_SAMPLE_CUT_RATIO));
if (max_num_bkts >= num_bound_bkts) {
need_sample = false;
} else {

View File

@ -279,7 +279,17 @@ int ObOptStatManager::update_table_stat(const uint64_t tenant_id,
return ret;
}
int ObOptStatManager::delete_table_stat(const uint64_t tenant_id,
int ObOptStatManager::delete_table_stat(uint64_t tenant_id,
const uint64_t ref_id,
int64_t &affected_rows)
{
int ret = OB_SUCCESS;
ObSEArray<int64_t, 1> part_ids;
bool cascade_column = true;
return delete_table_stat(tenant_id, ref_id, part_ids, cascade_column, affected_rows);
}
int ObOptStatManager::delete_table_stat(uint64_t tenant_id,
const uint64_t ref_id,
const ObIArray<int64_t> &part_ids,
const bool cascade_column,

View File

@ -139,6 +139,10 @@ public:
bool only_update_col_stat = false);
int delete_table_stat(const uint64_t tenant_id,
const uint64_t ref_id,
int64_t &affected_rows);
int delete_table_stat(uint64_t tenant_id,
const uint64_t ref_id,
const ObIArray<int64_t> &part_ids,
const bool cascade_column,

View File

@ -121,7 +121,8 @@ public:
data_version_(0),
last_analyzed_(0),
stattype_locked_(0),
modified_count_(0) {}
modified_count_(0),
sample_size_(0) {}
ObOptTableStat(uint64_t table_id,
int64_t partition_id,
int64_t object_type,
@ -150,7 +151,8 @@ public:
data_version_(data_version),
last_analyzed_(0),
stattype_locked_(0),
modified_count_(0) {}
modified_count_(0),
sample_size_(0) {}
virtual ~ObOptTableStat() {}
@ -202,6 +204,9 @@ public:
int64_t get_modified_count() const { return modified_count_; }
void set_modified_count(int64_t modified_count) { modified_count_ = modified_count; }
int64_t get_sample_size() const { return sample_size_; }
void set_sample_size(int64_t sample_size) { sample_size_ = sample_size; }
virtual int64_t size() const
{
return sizeof(*this);
@ -245,6 +250,7 @@ public:
last_analyzed_ = 0;
stattype_locked_ = 0;
modified_count_ = 0;
sample_size_ = 0;
}
TO_STRING_KV(K(table_id_),
@ -262,7 +268,8 @@ public:
K(data_version_),
K(last_analyzed_),
K(stattype_locked_),
K(modified_count_));
K(modified_count_),
K(sample_size_));
private:
uint64_t table_id_;
@ -283,6 +290,7 @@ private:
int64_t last_analyzed_;
uint64_t stattype_locked_;
int64_t modified_count_;
int64_t sample_size_;
};
}

View File

@ -116,6 +116,7 @@ int ObTableStatParam::assign(const ObTableStatParam &other)
global_data_part_id_ = other.global_data_part_id_;
data_table_id_ = other.data_table_id_;
need_estimate_block_ = other.need_estimate_block_;
is_temp_table_ = other.is_temp_table_;
if (OB_FAIL(part_infos_.assign(other.part_infos_))) {
LOG_WARN("failed to assign", K(ret));
} else if (OB_FAIL(subpart_infos_.assign(other.subpart_infos_))) {

View File

@ -50,6 +50,11 @@ enum StatOptionFlags
const static double OPT_DEFAULT_STALE_PERCENT = 0.1;
const static int64_t OPT_DEFAULT_STATS_RETENTION = 31;
const static int64_t OPT_STATS_MAX_VALUE_CAHR_LEN = 128;
const int64_t MAGIC_SAMPLE_SIZE = 5500;
const int64_t MAGIC_MAX_AUTO_SAMPLE_SIZE = 22000;
const int64_t MAGIC_MIN_SAMPLE_SIZE = 2500;
const int64_t MAGIC_BASE_SAMPLE_SIZE = 1000;
const double MAGIC_SAMPLE_CUT_RATIO = 0.00962;
enum StatLevel
{
@ -291,7 +296,8 @@ struct ObTableStatParam {
global_tablet_id_(0),
global_data_part_id_(INVALID_GLOBAL_PART_ID),
data_table_id_(INVALID_GLOBAL_PART_ID),
need_estimate_block_(true)
need_estimate_block_(true),
is_temp_table_(false)
{}
int assign(const ObTableStatParam &other);
@ -363,6 +369,7 @@ struct ObTableStatParam {
int64_t global_data_part_id_; // used to check wether table is locked, while gathering index stats.
int64_t data_table_id_; // the data table id for index schema
bool need_estimate_block_;//need estimate macro/micro block count
bool is_temp_table_;
TO_STRING_KV(K(tenant_id_),
K(db_name_),
@ -406,7 +413,8 @@ struct ObTableStatParam {
K(global_tablet_id_),
K(global_data_part_id_),
K(data_table_id_),
K(need_estimate_block_));
K(need_estimate_block_),
K(is_temp_table_));
};
struct ObOptStat

View File

@ -32,7 +32,8 @@ ObStatsEstimator::ObStatsEstimator(ObExecContext &ctx) :
group_by_string_(),
where_string_(),
stat_items_(),
results_()
results_(),
sample_value_(100.0)
{}
int ObStatsEstimator::gen_select_filed()
@ -116,6 +117,27 @@ int ObStatsEstimator::fill_sample_info(common::ObIAllocator &alloc,
return ret;
}
int ObStatsEstimator::fill_sample_info(common::ObIAllocator &alloc,
const ObAnalyzeSampleInfo &sample_info)
{
int ret = OB_SUCCESS;
if (!sample_info.is_sample_ || sample_info.sample_type_ == SampleType::RowSample) {
} else if (OB_UNLIKELY(sample_info.sample_value_ < 0.000001 || sample_info.sample_value_ > 100.0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", K(ret), K(sample_info));
} else if (sample_info.sample_value_ == 100.0) {
//do nothing
} else if (OB_FAIL(fill_sample_info(alloc,
sample_info.sample_value_,
sample_info.is_block_sample_,
sample_hint_))) {
LOG_WARN("failed to fill sample info", K(ret));
} else {
sample_value_ = sample_info.sample_value_;
}
return ret;
}
int ObStatsEstimator::fill_parallel_info(common::ObIAllocator &alloc,
int64_t degree)
{
@ -421,8 +443,13 @@ int ObStatsEstimator::copy_opt_stat(ObOptStat &src_opt_stat,
LOG_WARN("get unexpected null", K(ret), K(dst_opt_stats.at(i).table_stat_));
} else if (dst_opt_stats.at(i).table_stat_->get_partition_id() == partition_id) {
find_it = true;
dst_opt_stats.at(i).table_stat_->set_row_count(tmp_tab_stat->get_row_count());
int64_t row_cnt = tmp_tab_stat->get_row_count();
if (sample_value_ >= 0.000001 && sample_value_ < 100.0) {
row_cnt = static_cast<int64_t>(row_cnt * 100 / sample_value_);
}
dst_opt_stats.at(i).table_stat_->set_row_count(row_cnt);
dst_opt_stats.at(i).table_stat_->set_avg_row_size(tmp_tab_stat->get_avg_row_size());
dst_opt_stats.at(i).table_stat_->set_sample_size(tmp_tab_stat->get_row_count());
if (OB_FAIL(copy_col_stats(tmp_col_stats, dst_opt_stats.at(i).column_stats_))) {
LOG_WARN("failed to copy col stat", K(ret));
} else {/*do nothing*/}
@ -449,11 +476,19 @@ int ObStatsEstimator::copy_col_stats(ObIArray<ObOptColumnStat *> &src_col_stats,
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", K(ret), K(dst_col_stats.at(i)));
} else {
int64_t num_not_null = src_col_stats.at(i)->get_num_not_null();
int64_t num_null = src_col_stats.at(i)->get_num_null();
int64_t num_distinct = src_col_stats.at(i)->get_num_distinct();
if (sample_value_ >= 0.000001 && sample_value_ < 100.0) {
num_not_null = static_cast<int64_t>(num_not_null * 100 / sample_value_);
num_null = static_cast<int64_t>(num_null * 100 / sample_value_);
num_distinct = static_cast<int64_t>(num_distinct * 100 / sample_value_);
}
dst_col_stats.at(i)->set_max_value(src_col_stats.at(i)->get_max_value());
dst_col_stats.at(i)->set_min_value(src_col_stats.at(i)->get_min_value());
dst_col_stats.at(i)->set_num_not_null(src_col_stats.at(i)->get_num_not_null());
dst_col_stats.at(i)->set_num_null(src_col_stats.at(i)->get_num_null());
dst_col_stats.at(i)->set_num_distinct(src_col_stats.at(i)->get_num_distinct());
dst_col_stats.at(i)->set_num_not_null(num_not_null);
dst_col_stats.at(i)->set_num_null(num_null);
dst_col_stats.at(i)->set_num_distinct(num_distinct);
dst_col_stats.at(i)->set_avg_len(src_col_stats.at(i)->get_avg_len());
if (OB_ISNULL(dst_col_stats.at(i)->get_llc_bitmap()) ||
OB_ISNULL(src_col_stats.at(i)->get_llc_bitmap()) ||
@ -471,7 +506,7 @@ int ObStatsEstimator::copy_col_stats(ObIArray<ObOptColumnStat *> &src_col_stats,
dst_col_stats.at(i)->set_llc_bitmap_size(src_col_stats.at(i)->get_llc_bitmap_size());
ObHistogram &src_hist = src_col_stats.at(i)->get_histogram();
dst_col_stats.at(i)->get_histogram().set_type(src_hist.get_type());
dst_col_stats.at(i)->get_histogram().set_sample_size(src_hist.get_sample_size());
dst_col_stats.at(i)->get_histogram().set_sample_size(src_col_stats.at(i)->get_num_not_null());
dst_col_stats.at(i)->get_histogram().set_bucket_cnt(src_hist.get_bucket_cnt());
dst_col_stats.at(i)->get_histogram().set_density(src_hist.get_density());
if (OB_FAIL(append(dst_col_stats.at(i)->get_histogram().get_buckets(),

View File

@ -70,6 +70,9 @@ protected:
bool block_sample,
ObString &sample_hint);
int fill_sample_info(common::ObIAllocator &alloc,
const ObAnalyzeSampleInfo &sample_info);
int fill_parallel_info(common::ObIAllocator &alloc,
int64_t degree);
@ -116,6 +119,7 @@ protected:
ObArray<ObStatItem *> stat_items_;
ObArray<ObObj> results_;
double sample_value_;
};

View File

@ -48,6 +48,7 @@
#include "lib/utility/utility.h"
#include "lib/utility/ob_proto_trans_util.h"
#include "lib/allocator/ob_mod_define.h"
#include "share/stat/ob_opt_stat_manager.h"
using namespace oceanbase::sql;
@ -621,6 +622,11 @@ int ObSQLSessionInfo::delete_from_oracle_temp_tables(const obrpc::ObDropTableArg
}
if (OB_SUCC(ret)) {
LOG_DEBUG("succeed to delete rows in oracle temporary table", K(sql), K(affect_rows));
//delete relation temp table stats.
if (OB_FAIL(ObOptStatManager::get_instance().delete_table_stat(tenant_id,
table_schema->get_table_id(), affect_rows))) {
LOG_WARN("failed to delete table stats", K(ret));
}
} else {
LOG_WARN("failed to delete rows in oracle temporary table", K(ret), K(sql));
}