diff --git a/src/pl/sys_package/ob_dbms_stats.cpp b/src/pl/sys_package/ob_dbms_stats.cpp index a51d8016fe..ccefafe036 100644 --- a/src/pl/sys_package/ob_dbms_stats.cpp +++ b/src/pl/sys_package/ob_dbms_stats.cpp @@ -624,6 +624,7 @@ int ObDbmsStats::set_column_stats(sql::ObExecContext &ctx, params.at(1), params.at(2), params.at(3), + param.col_meta_, param.table_param_))) { LOG_WARN("failed to parse set column stats", K(ret)); } else if (OB_FAIL(parse_set_column_stats_options(ctx, @@ -3650,6 +3651,7 @@ int ObDbmsStats::parse_set_column_stats(ObExecContext &ctx, const ObObjParam &tab_name, const ObObjParam &colname, const ObObjParam &part_name, + ObObjMeta &col_meta, ObTableStatParam ¶m) { int ret = OB_SUCCESS; @@ -3705,6 +3707,7 @@ int ObDbmsStats::parse_set_column_stats(ObExecContext &ctx, } else { col_param.column_id_ = col->get_column_id(); col_param.cs_type_ = col->get_collation_type(); + col_meta = col->get_meta_type(); col_param.gather_flag_ = 0; col_param.bucket_num_ = -1; if (col->is_index_column()) { @@ -4162,7 +4165,12 @@ int ObDbmsStats::get_default_stat_options(ObExecContext &ctx, } } if (OB_SUCC(ret) && stat_options & StatOptionFlags::OPT_BLOCK_SAMPLE) { - param.sample_info_.set_is_block_sample(false); + ObBlockSamplePrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(*param.allocator_, ctx.get_my_session(), ObString(), tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else if (OB_FAIL(stat_prefs.push_back(tmp_pref))) { + LOG_WARN("failed to push back", K(ret)); + } } if (OB_SUCC(ret) && stat_options & StatOptionFlags::OPT_METHOD_OPT) { ObMethodOptPrefs *tmp_pref = NULL; @@ -4860,10 +4868,8 @@ int ObDbmsStats::parse_set_hist_stats_options(ObExecContext &ctx, number::ObNumber num_eavs; if (!epc.is_null() && OB_FAIL(epc.get_number(num_epc))) { LOG_WARN("failed to get epc", K(ret)); - } else if (!minval.is_null() && OB_FAIL(minval.get_raw(hist_param.minval_))) { - LOG_WARN("failed to get minval", K(ret)); - } else if (!maxval.is_null() && OB_FAIL(maxval.get_raw(hist_param.maxval_))) { - LOG_WARN("failed to get maxval", K(ret)); + } else if (!minval.is_null() && FALSE_IT(hist_param.minval_ = &minval)) { + } else if (!maxval.is_null() && FALSE_IT(hist_param.maxval_ = &maxval)) { } else if (OB_FAIL(parser_pl_numarray(bkvals, hist_param.bkvals_))) { LOG_WARN("failed to parser pl numarray", K(ret)); } else if (OB_FAIL(parser_pl_numarray(novals, hist_param.novals_))) { @@ -5493,53 +5499,28 @@ int ObDbmsStats::gather_database_table_stats(sql::ObExecContext &ctx, } else if (OB_FALSE_IT(tenant_id = session->get_effective_tenant_id())) { } else if (is_virtual_tenant_id(tenant_id)) { // do nothing - } else if (OB_FAIL(ObBasicStatsEstimator::get_need_stats_table_cnt(ctx, tenant_id, - task_info.task_table_count_))) { - LOG_WARN("failed to get all tables count", K(ret)); } else { int64_t slice_cnt = 10000; // maximum tables we can gather stats at each iteration - int64_t tmp_succeed = 0; + int64_t offset = 0; do { table_ids.reuse(); - tmp_succeed = succeed_cnt; - if (OB_FAIL(THIS_WORKER.check_status())) { - LOG_WARN("check status failed", KR(ret)); - } else if (OB_FAIL(ObBasicStatsEstimator::get_need_stats_tables(ctx, tenant_id, table_ids, slice_cnt))) { - LOG_WARN("failed to get tables that need gather stats", K(ret)); - } else if (OB_FAIL(do_gather_tables_stats(ctx, tenant_id, table_ids, - duration_time, succeed_cnt, task_info))) { - LOG_WARN("failed to gather table stats", K(ret)); - } - LOG_INFO("succeed to gather table stats", K(ret), K(table_ids.count()), K(slice_cnt), - K(tmp_succeed), K(duration_time), K(succeed_cnt)); - // case that we can break the loop: - // 1. #table_ids < slice_cnt, which means that we have fetched all the tables we need to gather stats - // 2. duration_time_ = -1, and has reached the ob_query_timeout session variable limit - // 3. duration_time is not -1, and the time we cost to gather stats has reached duration_time - } while (OB_SUCC(ret) && table_ids.count() == slice_cnt && (succeed_cnt - tmp_succeed) != 0); - // gather virtual table stats - ObSEArray all_table_ids; - if (OB_FAIL(ret)) { - } else if (OB_ISNULL(ctx.get_virtual_table_ctx().schema_guard_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(ctx.get_virtual_table_ctx().schema_guard_)); - } else if (OB_FAIL(ctx.get_virtual_table_ctx().schema_guard_->get_table_ids_in_tenant(tenant_id, all_table_ids))) { - LOG_WARN("failed to get virtual table ids in tenant", K(ret)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < all_table_ids.count(); ++i) { - int64_t table_id = static_cast(all_table_ids.at(i)); - if (is_virtual_table(table_id) && !ObDbmsStatsUtils::is_no_stat_virtual_table(table_id)) { - if (OB_FAIL(refresh_tenant_schema_guard(ctx, tenant_id))) { + if (OB_FAIL(ObBasicStatsEstimator::get_need_stats_tables(ctx, tenant_id, offset, slice_cnt, table_ids))) { + LOG_WARN("failed to get need stats tables", K(ret)); + } else { + task_info.task_table_count_ += table_ids.count(); + for (int64_t i = 0; OB_SUCC(ret) && i < table_ids.count(); ++i) { + if (OB_FAIL(THIS_WORKER.check_status())) { + LOG_WARN("failed to check status", K(ret)); + } else if (OB_FAIL(refresh_tenant_schema_guard(ctx, tenant_id))) { LOG_WARN("refresh tenant schema guard failed", K(ret)); - } else if (OB_FAIL(do_gather_table_stats(ctx, table_id, tenant_id, + } else if (OB_FAIL(do_gather_table_stats(ctx, table_ids.at(i), tenant_id, duration_time, succeed_cnt, task_info))) { - LOG_WARN("failed to gather virtual table stats", K(ret)); - } else { - ++task_info.task_table_count_; + LOG_WARN("failed to gather table stats", K(ret)); } } } - } + offset += slice_cnt; + } while (OB_SUCC(ret) && table_ids.count() == slice_cnt); } return ret; } @@ -5589,6 +5570,9 @@ int ObDbmsStats::do_gather_table_stats(sql::ObExecContext &ctx, } else if (OB_ISNULL(table_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); + } else if (is_recyclebin_database_id(table_schema->get_database_id()) || + (lib::is_oracle_mode() && is_oceanbase_sys_database_id(table_schema->get_database_id()))) { + //do nothing } else { StatTable stat_table(table_schema->get_database_id(), table_id); double stale_percent_threshold = OPT_DEFAULT_STALE_PERCENT; @@ -5787,6 +5771,8 @@ int ObDbmsStats::gather_table_stats_with_default_param(ObExecContext &ctx, LOG_WARN("failed to use default gather stat optitions", K(ret)); } else if (OB_FAIL(adjust_auto_gather_stat_option(stat_table.partition_stat_infos_, stat_param))) { LOG_WARN("failed to use default gather stat optitions", K(ret)); + } else if (!stat_param.need_gather_stats()) { + //do nothing } else if (OB_FAIL(running_monitor.add_table_info(stat_param, stat_table.stale_percent_))) { LOG_WARN("failed to add table info", K(ret)); } else if (OB_FAIL(ObDbmsStatsExecutor::gather_table_stats(ctx, stat_param, running_monitor))) { @@ -5820,7 +5806,9 @@ int ObDbmsStats::gather_table_stats_with_default_param(ObExecContext &ctx, LOG_TRACE("Succeed to gather table stats", K(stat_param)); } running_monitor.set_monitor_result(ret, ObTimeUtility::current_time(), stat_param.allocator_->used()); - update_optimizer_gather_stat_info(NULL, &gather_stat); + if (stat_param.need_gather_stats()) { + update_optimizer_gather_stat_info(NULL, &gather_stat); + } ObOptStatGatherStatList::instance().remove(gather_stat); task_info.completed_table_count_ ++; return ret; @@ -5936,13 +5924,21 @@ int ObDbmsStats::get_new_stat_pref(ObExecContext &ctx, } else { stat_pref = tmp_pref; } + } else if (0 == opt_name.case_compare("BLOCK_SAMPLE")) { + ObBlockSamplePrefs *tmp_pref = NULL; + if (OB_FAIL(new_stat_prefs(allocator, ctx.get_my_session(), opt_value, tmp_pref))) { + LOG_WARN("failed to new stat prefs", K(ret)); + } else { + stat_pref = tmp_pref; + } } else { ret = OB_ERR_DBMS_STATS_PL; LOG_WARN("Invalid input values for pname", K(ret), K(opt_name)); LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Invalid input values for pname, Only Support CASCADE |"\ - "DEGREE | ESTIMATE_PERCENT | GRANULARITY | INCREMENTAL |"\ - "INCREMENTAL_LEVEL | METHOD_OPT | NO_INVALIDATE | OPTIONS"\ - "STALE_PERCENT | ESTIMATE_BLOCK | APPROXIMATE_NDV(global prefs unique) prefs"); + "DEGREE | ESTIMATE_PERCENT | GRANULARITY | INCREMENTAL |"\ + "INCREMENTAL_LEVEL | METHOD_OPT | NO_INVALIDATE | OPTIONS |"\ + "STALE_PERCENT | ESTIMATE_BLOCK | BLOCK_SAMPLE |"\ + "APPROXIMATE_NDV(global prefs unique) prefs"); } return ret; } @@ -6807,11 +6803,18 @@ int ObDbmsStats::adjust_auto_gather_stat_option(const ObIArray 1) || OB_ISNULL(dst_opt_stats.at(0).table_stat_)) { @@ -200,6 +203,8 @@ int ObBasicStatsEstimator::estimate_block_count(ObExecContext &ctx, block_num_stat = new (buf) BlockNumStat(); block_num_stat->tab_macro_cnt_ = estimate_result.at(i).macro_block_count_; block_num_stat->tab_micro_cnt_ = estimate_result.at(i).micro_block_count_; + block_num_stat->sstable_row_cnt_ = estimate_result.at(i).sstable_row_count_; + block_num_stat->memtable_row_cnt_ = estimate_result.at(i).memtable_row_count_; total_sstable_row_cnt += estimate_result.at(i).sstable_row_count_; total_memtable_row_cnt += estimate_result.at(i).memtable_row_count_; int64_t partition_id = static_cast(estimate_result.at(i).part_id_); @@ -213,7 +218,9 @@ int ObBasicStatsEstimator::estimate_block_count(ObExecContext &ctx, block_num_stat->tab_macro_cnt_, block_num_stat->tab_micro_cnt_, block_num_stat->cg_macro_cnt_arr_, - block_num_stat->cg_micro_cnt_arr_))) { + block_num_stat->cg_micro_cnt_arr_, + block_num_stat->sstable_row_cnt_, + block_num_stat->memtable_row_cnt_))) { LOG_WARN("faild to add", K(ret)); } } else if (param.part_level_ == share::schema::PARTITION_LEVEL_TWO) { @@ -226,7 +233,9 @@ int ObBasicStatsEstimator::estimate_block_count(ObExecContext &ctx, block_num_stat->tab_macro_cnt_, block_num_stat->tab_micro_cnt_, block_num_stat->cg_macro_cnt_arr_, - block_num_stat->cg_micro_cnt_arr_))) { + block_num_stat->cg_micro_cnt_arr_, + block_num_stat->sstable_row_cnt_, + block_num_stat->memtable_row_cnt_))) { LOG_WARN("faild to add", K(ret)); } else { int64_t idx = 0; @@ -240,7 +249,9 @@ int ObBasicStatsEstimator::estimate_block_count(ObExecContext &ctx, block_num_stat->tab_macro_cnt_, block_num_stat->tab_micro_cnt_, block_num_stat->cg_macro_cnt_arr_, - block_num_stat->cg_micro_cnt_arr_))) { + block_num_stat->cg_micro_cnt_arr_, + block_num_stat->sstable_row_cnt_, + block_num_stat->memtable_row_cnt_))) { LOG_WARN("faild to add", K(ret)); } } @@ -271,6 +282,8 @@ int ObBasicStatsEstimator::estimate_block_count(ObExecContext &ctx, block_num_stat = new (buf) BlockNumStat(); block_num_stat->tab_macro_cnt_ = global_tab_stat.get_macro_block_count(); block_num_stat->tab_micro_cnt_ = global_tab_stat.get_micro_block_count(); + block_num_stat->sstable_row_cnt_ = global_tab_stat.get_sstable_row_cnt(); + block_num_stat->memtable_row_cnt_ = global_tab_stat.get_memtable_row_cnt(); if (OB_FAIL(block_num_stat->cg_macro_cnt_arr_.assign(global_tab_stat.get_cg_macro_arr())) || OB_FAIL(block_num_stat->cg_micro_cnt_arr_.assign(global_tab_stat.get_cg_micro_arr()))) { LOG_WARN("failed to assign", K(ret)); @@ -289,6 +302,8 @@ int ObBasicStatsEstimator::estimate_block_count(ObExecContext &ctx, block_num_stat = new (buf) BlockNumStat(); block_num_stat->tab_macro_cnt_ = first_part_tab_stats.at(i).get_macro_block_count(); block_num_stat->tab_micro_cnt_ = first_part_tab_stats.at(i).get_micro_block_count(); + block_num_stat->sstable_row_cnt_ = first_part_tab_stats.at(i).get_sstable_row_cnt(); + block_num_stat->memtable_row_cnt_ = first_part_tab_stats.at(i).get_memtable_row_cnt(); if (OB_FAIL(block_num_stat->cg_macro_cnt_arr_.assign(first_part_tab_stats.at(i).get_cg_macro_arr())) || OB_FAIL(block_num_stat->cg_micro_cnt_arr_.assign(first_part_tab_stats.at(i).get_cg_micro_arr()))) { LOG_WARN("failed to assign", K(ret)); @@ -783,7 +798,7 @@ int ObBasicStatsEstimator::estimate_stale_partition(ObExecContext &ctx, cur_part_id = dst_part_id; cur_inc_mod_count = inc_mod_count; } else if (OB_FAIL(check_partition_stat_state(cur_part_id, - has_subpart_invalid_inc ? 0 : cur_inc_mod_count, + has_subpart_invalid_inc ? -1 : cur_inc_mod_count, stale_percent_threshold, partition_stat_infos))) { LOG_WARN("failed to check partition stat state", K(ret)); @@ -804,13 +819,13 @@ int ObBasicStatsEstimator::estimate_stale_partition(ObExecContext &ctx, ret = OB_SUCCESS; if (cur_part_id != -1 && OB_FAIL(check_partition_stat_state(cur_part_id, - has_subpart_invalid_inc ? 0 : cur_inc_mod_count, + has_subpart_invalid_inc ? -1 : cur_inc_mod_count, stale_percent_threshold, partition_stat_infos))) { LOG_WARN("failed to check partition stat state", K(ret)); } else if (is_check_global && OB_FAIL(check_partition_stat_state(global_part_id, - has_part_invalid_inc ? 0 : table_inc_modified, + has_part_invalid_inc ? -1 : table_inc_modified, stale_percent_threshold, partition_stat_infos))) { LOG_WARN("failed to check partition stat state", K(ret)); @@ -826,6 +841,7 @@ int ObBasicStatsEstimator::estimate_stale_partition(ObExecContext &ctx, } } } + ObSEArray record_first_part_ids; for (int64_t i = 0; OB_SUCC(ret) && i < partition_infos.count(); ++i) { int64_t partition_id = partition_infos.at(i).part_id_; int64_t first_part_id = partition_infos.at(i).first_part_id_; @@ -836,16 +852,22 @@ int ObBasicStatsEstimator::estimate_stale_partition(ObExecContext &ctx, LOG_WARN("failed to push back", K(ret)); } else {/*do nothing*/} } - if (first_part_id != OB_INVALID_ID && !is_contain(monitor_modified_part_ids, first_part_id)) { + if (OB_SUCC(ret) && + first_part_id != OB_INVALID_ID && + !is_contain(monitor_modified_part_ids, first_part_id) && + !is_contain(record_first_part_ids, first_part_id)) { ObPartitionStatInfo partition_stat_info(first_part_id, 0, false, true); - ret = partition_stat_infos.push_back(partition_stat_info); + if (OB_FAIL(partition_stat_infos.push_back(partition_stat_info)) || + OB_FAIL(record_first_part_ids.push_back(first_part_id))) { + LOG_WARN("failed to push back", K(ret)); + } } } } - LOG_INFO("succeed to estimate stale partition", K(stale_percent_threshold), - K(partition_stat_infos), - K(partition_infos), - K(monitor_modified_part_ids)); + LOG_TRACE("succeed to estimate stale partition", K(stale_percent_threshold), + K(partition_stat_infos), + K(partition_infos), + K(monitor_modified_part_ids)); return ret; } @@ -881,6 +903,10 @@ int ObBasicStatsEstimator::update_last_modified_count(sqlclient::ObISQLConnectio ObSqlString tablet_list; int64_t affected_rows = 0; bool is_valid = true; + bool is_all_update = false; + //if this is virtual table real agent, we need update the real table id modifed count + uint64_t table_id = share::is_oracle_mapping_real_virtual_table(param.table_id_) ? + share::get_real_table_mappings_tid(param.table_id_) : param.table_id_; if (OB_ISNULL(conn)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(conn)); @@ -888,17 +914,18 @@ int ObBasicStatsEstimator::update_last_modified_count(sqlclient::ObISQLConnectio LOG_WARN("failed to check table read write valid", K(ret)); } else if (!is_valid) { // do nothing - } else if (OB_FAIL(gen_tablet_list(param, tablet_list))) { + } else if (OB_FAIL(gen_tablet_list(param, tablet_list, is_all_update))) { LOG_WARN("failed to gen partition list", K(ret)); - } else if (tablet_list.empty()) { + } else if (tablet_list.empty() && !is_all_update) { /*do nothing*/ } else if (OB_FAIL(udpate_sql.append_fmt( "update %s set last_inserts = inserts, last_updates = updates, last_deletes = deletes " \ - "where tenant_id = %lu and table_id = %lu and tablet_id in %s;", + "where tenant_id = %lu and table_id = %lu %s %s;", share::OB_ALL_MONITOR_MODIFIED_TNAME, share::schema::ObSchemaUtils::get_extract_tenant_id(param.tenant_id_, param.tenant_id_), - share::schema::ObSchemaUtils::get_extract_schema_id(param.tenant_id_, param.table_id_), - tablet_list.ptr()))) { + share::schema::ObSchemaUtils::get_extract_schema_id(param.tenant_id_, table_id), + !tablet_list.empty() ? "and tablet_id in" : " ", + !tablet_list.empty() ? tablet_list.ptr() : " "))) { LOG_WARN("failed to append fmt", K(ret)); } else if (OB_FAIL(conn->execute_write(param.tenant_id_, udpate_sql.ptr(), affected_rows))) { LOG_WARN("failed to execute sql", K(ret), K(udpate_sql)); @@ -1001,49 +1028,57 @@ int ObBasicStatsEstimator::check_partition_stat_state(const int64_t partition_id for (int64_t i = 0; !find_it && i < partition_stat_infos.count(); ++i) { if (partition_stat_infos.at(i).partition_id_ == partition_id) { //locked partition id or no arrived stale percent threshold no need regather stats. - double stale_percent = partition_stat_infos.at(i).row_cnt_ <= 0 ? 1.0 : - 1.0 * inc_mod_count / partition_stat_infos.at(i).row_cnt_; + double stale_percent = 0.0; + if (inc_mod_count < 0 || partition_stat_infos.at(i).row_cnt_ <= 0) { + stale_percent = inc_mod_count == 0 ? 0.0 : 1.0; + } else { + stale_percent = 1.0 * inc_mod_count / partition_stat_infos.at(i).row_cnt_; + } partition_stat_infos.at(i).is_no_stale_ = stale_percent <= stale_percent_threshold; find_it = true; } } if (!find_it) { ObPartitionStatInfo partition_stat_info(partition_id, 0, false, false); - partition_stat_info.is_no_stale_ = true; + partition_stat_info.is_no_stale_ = false; ret = partition_stat_infos.push_back(partition_stat_info); } return ret; } int ObBasicStatsEstimator::gen_tablet_list(const ObTableStatParam ¶m, - ObSqlString &tablet_list) + ObSqlString &tablet_list, + bool &is_all_update) { int ret = OB_SUCCESS; ObSEArray tablet_ids; + is_all_update = false; if (param.global_stat_param_.need_modify_) { - if (param.part_level_ == share::schema::ObPartitionLevel::PARTITION_LEVEL_ZERO) { - if (OB_UNLIKELY(param.global_tablet_id_ == ObTabletID::INVALID_TABLET_ID)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(param)); - } else if (OB_FAIL(tablet_ids.push_back(param.global_tablet_id_))) { - LOG_WARN("failed to push back", K(ret)); + if (param.part_level_ == share::schema::ObPartitionLevel::PARTITION_LEVEL_ZERO || + !param.global_stat_param_.gather_approx_) { + is_all_update = true; + } + } + if (OB_SUCC(ret) && !is_all_update && param.part_stat_param_.need_modify_) { + if (param.part_level_ == share::schema::ObPartitionLevel::PARTITION_LEVEL_ONE) { + for (int64_t i = 0; OB_SUCC(ret) && i < param.part_infos_.count(); ++i) { + if (OB_FAIL(tablet_ids.push_back(param.part_infos_.at(i).tablet_id_.id()))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } else if (param.part_level_ == share::schema::ObPartitionLevel::PARTITION_LEVEL_TWO) { + for (int64_t i = 0; OB_SUCC(ret) && i < param.part_infos_.count(); ++i) { + for (int64_t j = 0; OB_SUCC(ret) && j < param.subpart_infos_.count(); ++j) { + if (param.part_infos_.at(i).part_id_ == param.subpart_infos_.at(j).first_part_id_) { + if (OB_FAIL(tablet_ids.push_back(param.subpart_infos_.at(j).tablet_id_.id()))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } } } } - if (OB_SUCC(ret) && param.part_stat_param_.need_modify_ && - param.part_level_ != share::schema::ObPartitionLevel::PARTITION_LEVEL_TWO) { - for (int64_t i = 0; OB_SUCC(ret) && i < param.part_infos_.count(); ++i) { - if (OB_FAIL(tablet_ids.push_back(param.part_infos_.at(i).tablet_id_.id()))) { - LOG_WARN("failed to push back", K(ret)); - } - } - for (int64_t i = 0; OB_SUCC(ret) && i < param.approx_part_infos_.count(); ++i) { - if (OB_FAIL(tablet_ids.push_back(param.approx_part_infos_.at(i).tablet_id_.id()))) { - LOG_WARN("failed to push back", K(ret)); - } - } - } - if (OB_SUCC(ret) && param.subpart_stat_param_.need_modify_) { + if (OB_SUCC(ret) && !is_all_update && param.subpart_stat_param_.need_modify_) { for (int64_t i = 0; OB_SUCC(ret) && i < param.subpart_infos_.count(); ++i) { if (OB_FAIL(tablet_ids.push_back(param.subpart_infos_.at(i).tablet_id_.id()))) { LOG_WARN("failed to push back", K(ret)); @@ -1062,7 +1097,6 @@ int ObBasicStatsEstimator::gen_tablet_list(const ObTableStatParam ¶m, return ret; } - int ObBasicStatsEstimator::get_all_tablet_id_and_object_id(const ObTableStatParam ¶m, ObIArray &tablet_ids, ObIArray &partition_ids) @@ -1095,81 +1129,39 @@ int ObBasicStatsEstimator::get_all_tablet_id_and_object_id(const ObTableStatPara return ret; } -int ObBasicStatsEstimator::get_need_stats_table_cnt(ObExecContext &ctx, - const int64_t tenant_id, - int64_t &task_table_count) -{ - int ret = OB_SUCCESS; - ObSqlString select_sql; - if (OB_FAIL(select_sql.append_fmt( - "select count(1) as cnt from (select distinct m.table_id from " \ - "%s m left join %s up on m.table_id = up.table_id and up.pname = 'STALE_PERCENT' join %s gp on gp.sname = 'STALE_PERCENT' " \ - "where (case when (m.inserts+m.updates+m.deletes) = 0 then 0 " - "else ((m.inserts+m.updates+m.deletes) - (m.last_inserts+m.last_updates+m.last_deletes)) * 1.0 / (m.inserts+m.updates+m.deletes) > " \ - "(CASE WHEN up.valchar IS NOT NULL THEN cast(up.valchar as signed) * 1.0 / 100 ELSE Cast(gp.spare4 AS signed) * 1.0 / 100 end) end) " \ - "UNION select distinct table_id from %s where table_id not in (select table_id from %s)) ", - share::OB_ALL_MONITOR_MODIFIED_TNAME, - share::OB_ALL_OPTSTAT_USER_PREFS_TNAME, - share::OB_ALL_OPTSTAT_GLOBAL_PREFS_TNAME, - share::OB_ALL_MONITOR_MODIFIED_TNAME, - share::OB_ALL_TABLE_STAT_TNAME))) { - LOG_WARN("failed to append fmt", K(ret)); - } else { - ObCommonSqlProxy *sql_proxy = ctx.get_sql_proxy(); - SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { - sqlclient::ObMySQLResult *client_result = NULL; - ObSQLClientRetryWeak sql_client_retry_weak(sql_proxy); - if (OB_FAIL(sql_client_retry_weak.read(proxy_result, tenant_id, select_sql.ptr()))) { - LOG_WARN("failed to execute sql", K(ret), K(select_sql)); - } else if (OB_ISNULL(client_result = proxy_result.get_result())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to execute sql", K(ret)); - } else { - while (OB_SUCC(ret) && OB_SUCC(client_result->next())) { - int64_t idx = 0; - ObObj obj; - if (OB_FAIL(client_result->get_obj(idx, obj))) { - LOG_WARN("failed to get object", K(ret)); - } else if (OB_FAIL(obj.get_int(task_table_count))) { - LOG_WARN("failed to get int", K(ret), K(obj)); - } - } - ret = OB_ITER_END == ret ? OB_SUCCESS : ret; - } - int tmp_ret = OB_SUCCESS; - if (NULL != client_result) { - if (OB_SUCCESS != (tmp_ret = client_result->close())) { - LOG_WARN("close result set failed", K(ret), K(tmp_ret)); - ret = COVER_SUCC(tmp_ret); - } - } - } - LOG_TRACE("succeed to get table count that need gathering table stats", K(ret), K(task_table_count)); - } - return ret; -} - int ObBasicStatsEstimator::get_need_stats_tables(ObExecContext &ctx, const int64_t tenant_id, - ObIArray &table_ids, - int64_t &slice_cnt) + const int64_t offset, + const int64_t slice_cnt, + ObIArray &table_ids) { int ret = OB_SUCCESS; + ObSqlString gather_table_type_list; ObSqlString select_sql; - if (OB_FAIL(select_sql.append_fmt( - "select distinct table_id from (select m.table_id from " \ - "%s m left join %s up on m.table_id = up.table_id and up.pname = 'STALE_PERCENT' join %s gp on gp.sname = 'STALE_PERCENT' " \ - "where (case when (m.inserts+m.updates+m.deletes) = 0 then 0 "\ - "else ((m.inserts+m.updates+m.deletes) - (m.last_inserts+m.last_updates+m.last_deletes)) * 1.0 / (m.inserts+m.updates+m.deletes) > " \ - "(CASE WHEN up.valchar IS NOT NULL THEN cast(up.valchar as signed) * 1.0 / 100 ELSE Cast(gp.spare4 AS signed) * 1.0 / 100 end) end) "\ - " UNION ALL select table_id from %s where table_id not in (select table_id from %s)) " - "ORDER BY table_id DESC limit %ld", - share::OB_ALL_MONITOR_MODIFIED_TNAME, - share::OB_ALL_OPTSTAT_USER_PREFS_TNAME, - share::OB_ALL_OPTSTAT_GLOBAL_PREFS_TNAME, - share::OB_ALL_MONITOR_MODIFIED_TNAME, - share::OB_ALL_TABLE_STAT_TNAME, - slice_cnt))) { + if (OB_FAIL(get_gather_table_type_list(gather_table_type_list))) { + LOG_WARN("failed to get gather table type list", K(ret)); + } else if (OB_FAIL(select_sql.append_fmt("SELECT /*+no_rewrite*/table_id "\ + "FROM (SELECT tenant_id,"\ + " table_id,"\ + " table_type"\ + " FROM %s"\ + " WHERE table_type IN %s"\ + " ORDER BY tenant_id,"\ + " table_id"\ + " LIMIT %ld, %ld) t "\ + "WHERE table_type = %u "\ + " OR EXISTS(SELECT 1 "\ + " FROM %s m"\ + " WHERE t.table_id = m.table_id"\ + " AND t.tenant_id = m.tenant_id"\ + " AND inserts + deletes + updates > 0"\ + " limit 1); ", + share::OB_ALL_TABLE_TNAME, + gather_table_type_list.ptr(), + offset, + slice_cnt, + share::schema::ObTableType::VIRTUAL_TABLE, + share::OB_ALL_MONITOR_MODIFIED_TNAME))) { LOG_WARN("failed to append fmt", K(ret)); } else { ObCommonSqlProxy *sql_proxy = ctx.get_sql_proxy(); @@ -1205,7 +1197,7 @@ int ObBasicStatsEstimator::get_need_stats_tables(ObExecContext &ctx, } } LOG_TRACE("succeed to get table ids that need gathering table stats", - K(ret), K(slice_cnt), K(tenant_id), K(table_ids.count()), K(table_ids)); + K(select_sql), K(offset), K(slice_cnt), K(table_ids)); } return ret; } @@ -1400,5 +1392,23 @@ int ObBasicStatsEstimator::check_can_use_column_store_and_split_part_gather(cons return ret; } +int ObBasicStatsEstimator::get_gather_table_type_list(ObSqlString &gather_table_type_list) +{ + int ret = OB_SUCCESS; + int64_t table_type_arr[] = {share::schema::ObTableType::SYSTEM_TABLE, + share::schema::ObTableType::VIRTUAL_TABLE, + share::schema::ObTableType::USER_TABLE, + share::schema::ObTableType::EXTERNAL_TABLE}; + int64_t table_type_cnt = sizeof(table_type_arr)/sizeof(table_type_arr[0]); + for (int64_t i = 0; OB_SUCC(ret) && i < table_type_cnt; ++i) { + char prefix = (i == 0 ? '(' : ' '); + char suffix = (i == table_type_cnt - 1 ? ')' : ','); + if (OB_FAIL(gather_table_type_list.append_fmt("%c%lu%c", prefix, table_type_arr[i], suffix))) { + LOG_WARN("failed to append sql", K(ret)); + } else {/*do nothing*/} + } + return ret; +} + } // end of common } // end of oceanbase diff --git a/src/share/stat/ob_basic_stats_estimator.h b/src/share/stat/ob_basic_stats_estimator.h index 83df8808b4..a5c32d22e3 100644 --- a/src/share/stat/ob_basic_stats_estimator.h +++ b/src/share/stat/ob_basic_stats_estimator.h @@ -102,7 +102,8 @@ public: ObIArray &partition_stat_infos); static int gen_tablet_list(const ObTableStatParam ¶m, - ObSqlString &tablet_list); + ObSqlString &tablet_list, + bool &is_all_update); static int do_estimate_block_count(ObExecContext &ctx, const uint64_t tenant_id, @@ -137,12 +138,9 @@ public: static int get_need_stats_tables(ObExecContext &ctx, const int64_t tenant_id, - ObIArray &table_ids, - int64_t &slice_cnt); - - static int get_need_stats_table_cnt(ObExecContext &ctx, - const int64_t tenant_id, - int64_t &task_table_count); + const int64_t offset, + const int64_t slice_cnt, + ObIArray &table_ids); int estimate(const ObOptStatGatherParam ¶m, ObIArray &dst_opt_stats); @@ -180,6 +178,8 @@ private: const int64_t degree, bool &use_column_store, bool &use_split_part); + + static int get_gather_table_type_list(ObSqlString &gather_table_type_list); }; } diff --git a/src/share/stat/ob_dbms_stats_copy_table_stats.cpp b/src/share/stat/ob_dbms_stats_copy_table_stats.cpp index b44670fa83..fd472601e5 100644 --- a/src/share/stat/ob_dbms_stats_copy_table_stats.cpp +++ b/src/share/stat/ob_dbms_stats_copy_table_stats.cpp @@ -16,6 +16,7 @@ #include "share/stat/ob_opt_column_stat.h" #include "share/stat/ob_dbms_stats_utils.h" #include "share/stat/ob_dbms_stats_copy_table_stats.h" +#include "share/stat/ob_dbms_stats_history_manager.h" int CopyTableStatHelper::copy_part_stat(ObIArray &table_stats) { @@ -419,8 +420,28 @@ int ObDbmsStatsCopyTableStats::copy_tab_col_stats(sql::ObExecContext &ctx, LOG_WARN("src table stat is not analyzed", K(table_stat_param.part_infos_.at(0).part_id_)); } else if (OB_FAIL(copy_stat_helper.copy_part_col_stat(table_stat_param.is_subpart_name_, col_handles, table_stats, column_stats))) { LOG_WARN("failed to copy table column stat", K(ret), KPC(copy_stat_helper.src_part_stat_)); - } else if (OB_FAIL(ObDbmsStatsUtils::split_batch_write(ctx, table_stats, column_stats))) { - LOG_WARN("failed to split batch write stat", K(ret)); + } + if (OB_SUCC(ret)) { + ObMySQLTransaction trans; + //begin trans + if (OB_FAIL(trans.start(ctx.get_sql_proxy(), table_stat_param.tenant_id_))) { + LOG_WARN("fail to start transaction", K(ret)); + } else if (OB_FAIL(ObDbmsStatsHistoryManager::backup_opt_stats(ctx, trans, table_stat_param, ObTimeUtility::current_time()))) { + LOG_WARN("failed to backup opt stats", K(ret)); + } else if (OB_FAIL(ObDbmsStatsUtils::split_batch_write(ctx, trans.get_connection(), table_stats, column_stats))) { + LOG_WARN("failed to split batch write", K(ret)); + } else {/*do nothing*/} + //end trans + if (OB_SUCC(ret)) { + if (OB_FAIL(trans.end(true))) { + LOG_WARN("fail to commit transaction", K(ret)); + } + } else { + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = trans.end(false))) { + LOG_WARN("fail to roll back transaction", K(tmp_ret)); + } + } } return ret; } diff --git a/src/share/stat/ob_dbms_stats_executor.cpp b/src/share/stat/ob_dbms_stats_executor.cpp index 07b063f6f0..9ece10b782 100644 --- a/src/share/stat/ob_dbms_stats_executor.cpp +++ b/src/share/stat/ob_dbms_stats_executor.cpp @@ -341,7 +341,7 @@ int ObDbmsStatsExecutor::split_gather_partition_stats(ObExecContext &ctx, } } if (OB_SUCC(ret)) { - if (gather_helper.maximum_gather_col_cnt_ >= param.column_params_.count()) { + if (gather_helper.maximum_gather_col_cnt_ >= param.get_need_gather_column()) { ObSEArray all_tstats; ObSEArray all_cstats; ObSEArray opt_stats; @@ -677,7 +677,7 @@ int ObDbmsStatsExecutor::check_need_split_gather(const ObTableStatParam ¶m, GatherHelper &gather_helper) { int ret = OB_SUCCESS; - int64_t column_cnt = param.column_params_.count(); + int64_t column_cnt = param.get_need_gather_column(); int64_t partition_cnt = param.subpart_stat_param_.need_modify_ ? param.subpart_infos_.count() : (param.part_stat_param_.need_modify_ ? param.part_infos_.count() + param.approx_part_infos_.count() : 1); bool need_histgoram = param.subpart_stat_param_.need_modify_ ? param.subpart_stat_param_.gather_histogram_ : @@ -921,7 +921,7 @@ int ObDbmsStatsExecutor::set_column_stats(ObExecContext &ctx, col_stat->set_column_id(key.column_id_); col_stat->set_collation_type(param.table_param_.column_params_.at(0).cs_type_); col_stat->set_last_analyzed(0); - if (OB_FAIL(do_set_column_stats(param, col_stat))) { + if (OB_FAIL(do_set_column_stats(*alloc, ctx.get_my_session()->get_dtc_params(), param, col_stat))) { LOG_WARN("failed to do set table stats", K(ret)); } else if (OB_FAIL(column_stats.push_back(col_stat))) { LOG_WARN("failed to push back column stat", K(ret)); @@ -994,7 +994,9 @@ int ObDbmsStatsExecutor::do_set_table_stats(const ObSetTableStatParam ¶m, return ret; } -int ObDbmsStatsExecutor::do_set_column_stats(const ObSetColumnStatParam ¶m, +int ObDbmsStatsExecutor::do_set_column_stats(ObIAllocator &allocator, + const ObDataTypeCastParams &dtc_params, + const ObSetColumnStatParam ¶m, ObOptColumnStat *&column_stat) { int ret = OB_SUCCESS; @@ -1025,9 +1027,21 @@ int ObDbmsStatsExecutor::do_set_column_stats(const ObSetColumnStatParam ¶m, if (param.avgclen_ > 0) { column_stat->set_avg_len(param.avgclen_); } - //5.set hist_param TODO @jiangxiu.wt + //5.set max/val value + if (param.hist_param_.minval_ != NULL || param.hist_param_.maxval_ != NULL) { + ObCastCtx cast_ctx(&allocator, &dtc_params, CM_NONE, param.col_meta_.get_collation_type()); + if ((param.hist_param_.minval_ != NULL && + OB_FAIL(ObObjCaster::to_type(param.col_meta_.get_type(), cast_ctx, *param.hist_param_.minval_, column_stat->get_min_value()))) || + (param.hist_param_.maxval_ != NULL && + OB_FAIL(ObObjCaster::to_type(param.col_meta_.get_type(), cast_ctx, *param.hist_param_.maxval_, column_stat->get_max_value())))) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Invalid or inconsistent input values", K(ret), K(param)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"Invalid or inconsistent input values"); + } + } + //6.set hist_param TODO @jiangxiu.wt //other options support later. - LOG_TRACE("succeed to do set column stats", K(*column_stat)); + LOG_TRACE("succeed to do set column stats", K(param), K(*column_stat)); } return ret; } diff --git a/src/share/stat/ob_dbms_stats_executor.h b/src/share/stat/ob_dbms_stats_executor.h index 6530b4f19a..08ef7a92db 100644 --- a/src/share/stat/ob_dbms_stats_executor.h +++ b/src/share/stat/ob_dbms_stats_executor.h @@ -142,7 +142,9 @@ private: static int do_set_table_stats(const ObSetTableStatParam ¶m, ObOptTableStat *table_stat); - static int do_set_column_stats(const ObSetColumnStatParam ¶m, + static int do_set_column_stats(ObIAllocator &allocator, + const ObDataTypeCastParams &dtc_params, + const ObSetColumnStatParam ¶m, ObOptColumnStat *&column_stat); static int reset_table_locked_state(ObExecContext &ctx, diff --git a/src/share/stat/ob_dbms_stats_export_import.cpp b/src/share/stat/ob_dbms_stats_export_import.cpp index 7d034063bd..15fcbf2717 100644 --- a/src/share/stat/ob_dbms_stats_export_import.cpp +++ b/src/share/stat/ob_dbms_stats_export_import.cpp @@ -1030,7 +1030,7 @@ int ObDbmsStatsExportImport::get_opt_stat(ObExecContext &ctx, } else if (OB_FAIL(num_val.extract_valid_int64_with_trunc(int_val))) { LOG_WARN("extract_valid_int64_with_trunc failed", K(ret), K(num_val)); } else if (int_val > 0) { - if (OB_UNLIKELY(col_stat->get_histogram().get_density() <= 0.0)) { + if (OB_UNLIKELY(col_stat->get_histogram().get_density() < 0.0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(result_objs), K(ret), KPC(col_stat)); } else if (col_stat->get_histogram().get_buckets().empty()) { diff --git a/src/share/stat/ob_dbms_stats_gather.cpp b/src/share/stat/ob_dbms_stats_gather.cpp index 9fe5c89929..5b4c25660c 100644 --- a/src/share/stat/ob_dbms_stats_gather.cpp +++ b/src/share/stat/ob_dbms_stats_gather.cpp @@ -95,12 +95,15 @@ int ObDbmsStatsGather::classfy_column_histogram(const ObOptStatGatherParam ¶ LOG_WARN("get unexpected error", K(ret), KPC(dst_col_stat), K(col_param)); } else if (col_param.need_basic_stat() && col_param.bucket_num_ > 1 && - dst_col_stat->get_num_distinct() > 0) { + dst_col_stat->get_num_distinct() > 0 && + dst_col_stat->get_num_not_null() > 0) { int64_t max_disuse_cnt = std::ceil(dst_col_stat->get_num_not_null() * 1.0 / col_param.bucket_num_); //After testing, the error of using hyperloglog to estimate ndv is within %5. const double MAX_LLC_NDV_ERR_RATE = !param.need_approx_ndv_ ? 0.0 : 0.05; const int64_t fault_tolerance_cnt = std::ceil(dst_col_stat->get_num_distinct() * MAX_LLC_NDV_ERR_RATE); - if (dst_col_stat->get_num_distinct() >= col_param.bucket_num_ + max_disuse_cnt + fault_tolerance_cnt) { + double sample_val = dst_col_stat->get_histogram().get_sample_size() * 100.0 / dst_col_stat->get_num_not_null(); + if (dst_col_stat->get_num_distinct() >= col_param.bucket_num_ + max_disuse_cnt + fault_tolerance_cnt || + sample_val < 100.0 * (1.0 - 1.0 / col_param.bucket_num_)) { //directly gather hybrid histogram dst_col_stat->get_histogram().set_type(ObHistType::HYBIRD); } else { @@ -179,6 +182,8 @@ int ObDbmsStatsGather::init_opt_stat(ObIAllocator &allocator, } else { tab_stat->set_macro_block_num(block_num_stat->tab_macro_cnt_); tab_stat->set_micro_block_num(block_num_stat->tab_micro_cnt_); + tab_stat->set_sstable_row_count(block_num_stat->sstable_row_cnt_); + tab_stat->set_memtable_row_count(block_num_stat->memtable_row_cnt_); } } for (int64_t i = 0; OB_SUCC(ret) && i < param.column_params_.count(); ++i) { diff --git a/src/share/stat/ob_dbms_stats_maintenance_window.cpp b/src/share/stat/ob_dbms_stats_maintenance_window.cpp index 0b57360255..d9cc26af25 100644 --- a/src/share/stat/ob_dbms_stats_maintenance_window.cpp +++ b/src/share/stat/ob_dbms_stats_maintenance_window.cpp @@ -374,7 +374,7 @@ int ObDbmsStatsMaintenanceWindow::is_stats_maintenance_window_attr(const sql::Ob if (0 == attr_name.case_compare("job_action")) { if (0 == job_name.case_compare(opt_stats_history_manager)) { const char *job_action_name = "DBMS_STATS.PURGE_STATS("; - if (0 == strncasecmp(val_name.ptr(), job_action_name, strlen(job_action_name))) { + if (!val_name.empty() && 0 == strncasecmp(val_name.ptr(), job_action_name, strlen(job_action_name))) { if (OB_FAIL(dml.add_column("job_action", ObHexEscapeSqlStr(val_name)))) { LOG_WARN("failed to add column", K(ret)); } else if (OB_FAIL(dml.add_column("what", ObHexEscapeSqlStr(val_name)))) { @@ -385,7 +385,7 @@ int ObDbmsStatsMaintenanceWindow::is_stats_maintenance_window_attr(const sql::Ob } else {/*do nothing*/} } else { const char *job_action_name = "DBMS_STATS.GATHER_DATABASE_STATS_JOB_PROC("; - if (0 == strncasecmp(val_name.ptr(), job_action_name, strlen(job_action_name))) { + if (!val_name.empty() && 0 == strncasecmp(val_name.ptr(), job_action_name, strlen(job_action_name))) { if (OB_FAIL(dml.add_column("job_action", ObHexEscapeSqlStr(val_name)))) { LOG_WARN("failed to add column", K(ret)); } else if (OB_FAIL(dml.add_column("what", ObHexEscapeSqlStr(val_name)))) { diff --git a/src/share/stat/ob_dbms_stats_preferences.cpp b/src/share/stat/ob_dbms_stats_preferences.cpp index cea3672617..36e08a5ec1 100644 --- a/src/share/stat/ob_dbms_stats_preferences.cpp +++ b/src/share/stat/ob_dbms_stats_preferences.cpp @@ -101,9 +101,7 @@ int ObDbmsStatsPreferences::get_prefs(ObExecContext &ctx, } else if (got_result) { /*do nothing*/ } else { - ret = OB_ERR_DBMS_STATS_PL; - LOG_WARN("Invalid input values for pname", K(ret), K(opt_name)); - LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Invalid input values for pname"); + result.set_null(); } LOG_TRACE("Succeed to get prefs", K(ret), K(get_user_sql), K(get_global_sql), K(result)); } @@ -544,6 +542,22 @@ int ObDbmsStatsPreferences::gen_init_global_prefs_sql(ObSqlString &raw_sql, } if (OB_SUCC(ret)) {//init estimate_block ObEstimateBlockPrefs prefs; + if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), + K(prefs.get_stat_pref_default_value())); + } else if (OB_FAIL(value_str.append_fmt("('%s', %s, %s, '%s'),", + prefs.get_stat_pref_name(), + null_str, + time_str, + prefs.get_stat_pref_default_value()))) { + LOG_WARN("failed to append", K(ret)); + } else { + ++ total_rows; + } + } + if (OB_SUCC(ret)) {//init block_sample + ObBlockSamplePrefs prefs; if (OB_ISNULL(prefs.get_stat_pref_name()) || OB_ISNULL(prefs.get_stat_pref_default_value())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(prefs.get_stat_pref_name()), @@ -1071,6 +1085,25 @@ int ObEstimateBlockPrefs::check_pref_value_validity(ObTableStatParam *param/*def return ret; } +int ObBlockSamplePrefs::check_pref_value_validity(ObTableStatParam *param/*default null*/) +{ + int ret = OB_SUCCESS; + if (pvalue_.empty() || 0 == pvalue_.case_compare("FALSE")) { + if (param != NULL) { + param->sample_info_.set_is_block_sample(false); + } + } else if (0 == pvalue_.case_compare("TRUE")) { + if (param != NULL) { + param->sample_info_.set_is_block_sample(true); + } + } else { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Illegal value for BLOCK_SAMPLE", K(ret), K(pvalue_)); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL,"Illegal value for BLOCK_SAMPLE: must be {TRUE, FALSE}"); + } + return ret; +} + #define ISSPACE(c) ((c) == ' ' || (c) == '\n' || (c) == '\r' || (c) == '\t' || (c) == '\f' || (c) == '\v') //compatible oracle, global prefs/schema prefs just only can set "for all columns...." diff --git a/src/share/stat/ob_dbms_stats_preferences.h b/src/share/stat/ob_dbms_stats_preferences.h index 9226e25d52..751931c2a5 100644 --- a/src/share/stat/ob_dbms_stats_preferences.h +++ b/src/share/stat/ob_dbms_stats_preferences.h @@ -214,6 +214,19 @@ class ObEstimateBlockPrefs : public ObStatPrefs virtual const char* get_stat_pref_default_value() const { return "TRUE"; } }; +class ObBlockSamplePrefs : public ObStatPrefs +{ + public: + ObBlockSamplePrefs() : ObStatPrefs() {} + ObBlockSamplePrefs(ObIAllocator *alloc, + ObSQLSessionInfo *session_info, + const ObString &pvalue) : + ObStatPrefs(alloc, session_info, pvalue) {} + virtual int check_pref_value_validity(ObTableStatParam *param = NULL) override; + virtual const char* get_stat_pref_name() const { return "BLOCK_SAMPLE"; } + virtual const char* get_stat_pref_default_value() const { return "FALSE"; } +}; + template static int new_stat_prefs(ObIAllocator &allocator, ObSQLSessionInfo *session_info, const ObString &opt_value, T *&src) diff --git a/src/share/stat/ob_dbms_stats_utils.cpp b/src/share/stat/ob_dbms_stats_utils.cpp index 68df2b5055..8464b23233 100644 --- a/src/share/stat/ob_dbms_stats_utils.cpp +++ b/src/share/stat/ob_dbms_stats_utils.cpp @@ -1075,8 +1075,10 @@ int ObDbmsStatsUtils::prepare_gather_stat_param(const ObTableStatParam ¶m, gather_param.stat_level_ = stat_level; if (stat_level == SUBPARTITION_LEVEL) { gather_param.need_histogram_ = param.subpart_stat_param_.gather_histogram_; + gather_param.is_specify_partition_ = param.subpart_infos_.count() != param.all_subpart_infos_.count(); } else if (stat_level == PARTITION_LEVEL) { gather_param.need_histogram_ = param.part_stat_param_.gather_histogram_; + gather_param.is_specify_partition_ = param.part_infos_.count() != param.all_part_infos_.count(); } else if (stat_level == TABLE_LEVEL) { gather_param.need_histogram_ = param.global_stat_param_.gather_histogram_; } diff --git a/src/share/stat/ob_hybrid_hist_estimator.cpp b/src/share/stat/ob_hybrid_hist_estimator.cpp index b253e44d62..e2e81ee9d8 100644 --- a/src/share/stat/ob_hybrid_hist_estimator.cpp +++ b/src/share/stat/ob_hybrid_hist_estimator.cpp @@ -354,16 +354,28 @@ int ObHybridHistEstimator::compute_estimate_percent(int64_t total_row_count, } if (OB_SUCC(ret) && need_sample) { if (total_row_count * est_percent / 100 >= MAGIC_MIN_SAMPLE_SIZE) { - /*do nothing*/ + const int64_t MAGIC_MAX_SPECIFY_SAMPLE_SIZE = 1000000; + is_block_sample = !is_block_sample ? total_row_count >= MAX_AUTO_GATHER_FULL_TABLE_ROWS : is_block_sample; + int64_t max_allowed_multiple = max_num_bkts <= ObColumnStatParam::DEFAULT_HISTOGRAM_BUCKET_NUM ? 1 : + max_num_bkts / ObColumnStatParam::DEFAULT_HISTOGRAM_BUCKET_NUM; + int64_t max_specify_sample_size = MAGIC_MAX_SPECIFY_SAMPLE_SIZE * max_allowed_multiple; + if (total_row_count * est_percent / 100 >= max_specify_sample_size) { + est_percent = max_specify_sample_size * 100.0 / total_row_count; + } } else if (total_row_count <= MAGIC_SAMPLE_SIZE) { need_sample = false; est_percent = 0.0; is_block_sample = false; } else { - is_block_sample = false; + is_block_sample = total_row_count >= MAX_AUTO_GATHER_FULL_TABLE_ROWS; est_percent = (MAGIC_SAMPLE_SIZE * 100.0) / total_row_count; } } + } else if (total_row_count >= MAX_AUTO_GATHER_FULL_TABLE_ROWS) { + need_sample = true; + is_block_sample = true; + const int64_t MAGIC_MAX_SAMPLE_SIZE = 100000; + est_percent = MAGIC_MAX_SAMPLE_SIZE * 100.0 / total_row_count; } else if (total_row_count >= MAGIC_MAX_AUTO_SAMPLE_SIZE) { if (max_num_bkts <= ObColumnStatParam::DEFAULT_HISTOGRAM_BUCKET_NUM) { need_sample = true; diff --git a/src/share/stat/ob_incremental_stat_estimator.cpp b/src/share/stat/ob_incremental_stat_estimator.cpp index f5d8ca1127..54939a9a13 100644 --- a/src/share/stat/ob_incremental_stat_estimator.cpp +++ b/src/share/stat/ob_incremental_stat_estimator.cpp @@ -227,6 +227,8 @@ int ObIncrementalStatEstimator::derive_split_gather_stats(ObExecContext &ctx, } else if (OB_ISNULL(param.allocator_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(param)); + } else if (OB_FAIL(THIS_WORKER.check_status())) { + LOG_WARN("check status failed", KR(ret)); } else { ObArenaAllocator allocator("IncrementStats", OB_MALLOC_NORMAL_BLOCK_SIZE, param.tenant_id_); ObSEArray cur_table_stats; diff --git a/src/share/stat/ob_index_stats_estimator.cpp b/src/share/stat/ob_index_stats_estimator.cpp index 459384395f..a2d24537ce 100644 --- a/src/share/stat/ob_index_stats_estimator.cpp +++ b/src/share/stat/ob_index_stats_estimator.cpp @@ -313,6 +313,8 @@ int ObIndexStatsEstimator::fast_gather_index_stats(ObExecContext &ctx, } else { index_stat->set_macro_block_num(block_num_stat->tab_macro_cnt_); index_stat->set_micro_block_num(block_num_stat->tab_micro_cnt_); + index_stat->set_sstable_row_count(block_num_stat->sstable_row_cnt_); + index_stat->set_memtable_row_count(block_num_stat->memtable_row_cnt_); } if (OB_SUCC(ret)) { if (OB_FAIL(index_table_stats.push_back(index_stat))) { diff --git a/src/share/stat/ob_opt_stat_sql_service.cpp b/src/share/stat/ob_opt_stat_sql_service.cpp index 9c563075fd..b508f08d72 100644 --- a/src/share/stat/ob_opt_stat_sql_service.cpp +++ b/src/share/stat/ob_opt_stat_sql_service.cpp @@ -908,11 +908,11 @@ int ObOptStatSqlService::get_table_stat_sql(const uint64_t tenant_id, OB_FAIL(dml_splicer.add_column("object_type", stat.get_object_type())) || OB_FAIL(dml_splicer.add_time_column("last_analyzed", stat.get_last_analyzed() == 0 ? current_time : stat.get_last_analyzed())) || - OB_FAIL(dml_splicer.add_column("sstable_row_count", -1)) || + OB_FAIL(dml_splicer.add_column("sstable_row_count", stat.get_sstable_row_count())) || OB_FAIL(dml_splicer.add_column("sstable_avg_row_len", -1)) || OB_FAIL(dml_splicer.add_column("macro_blk_cnt", stat.get_macro_block_num())) || OB_FAIL(dml_splicer.add_column("micro_blk_cnt", stat.get_micro_block_num())) || - OB_FAIL(dml_splicer.add_column("memtable_row_cnt", -1)) || + OB_FAIL(dml_splicer.add_column("memtable_row_cnt", stat.get_memtable_row_count())) || OB_FAIL(dml_splicer.add_column("memtable_avg_row_len", -1)) || OB_FAIL(dml_splicer.add_column("row_cnt", stat.get_row_count())) || OB_FAIL(dml_splicer.add_column("avg_row_len", stat.get_avg_row_size())) || @@ -1001,7 +1001,7 @@ int ObOptStatSqlService::get_column_stat_sql(const uint64_t tenant_id, OB_FAIL(dml_splicer.add_column("distinct_cnt_synopsis", llc_hex_buf == NULL ? "" : llc_hex_buf)) || OB_FAIL(dml_splicer.add_column("distinct_cnt_synopsis_size", llc_comp_size * 2)) || OB_FAIL(dml_splicer.add_column("sample_size", stat.get_histogram().get_sample_size())) || - OB_FAIL(dml_splicer.add_column("density", stat.get_histogram().get_density())) || + OB_FAIL(dml_splicer.add_long_double_column("density", stat.get_histogram().get_density())) || OB_FAIL(dml_splicer.add_column("bucket_cnt", stat.get_histogram().get_bucket_cnt())) || OB_FAIL(dml_splicer.add_column("histogram_type", stat.get_histogram().get_type())) || OB_FAIL(dml_splicer.add_column("global_stats", 0)) || diff --git a/src/share/stat/ob_stat_define.cpp b/src/share/stat/ob_stat_define.cpp index ded92ea5c9..abcabb477d 100644 --- a/src/share/stat/ob_stat_define.cpp +++ b/src/share/stat/ob_stat_define.cpp @@ -284,5 +284,27 @@ bool ObTableStatParam::is_specify_column_gather() const return is_specify; } +int64_t ObTableStatParam::get_need_gather_column() const +{ + int64_t valid_column = 0; + for (int64_t i = 0; i < column_params_.count(); ++i) { + if (column_params_.at(i).need_basic_stat()) { + ++ valid_column; + } + } + return valid_column; +} + +int64_t ObOptStatGatherParam::get_need_gather_column() const +{ + int64_t valid_column = 0; + for (int64_t i = 0; i < column_params_.count(); ++i) { + if (column_params_.at(i).need_basic_stat()) { + ++ valid_column; + } + } + return valid_column; +} + } } diff --git a/src/share/stat/ob_stat_define.h b/src/share/stat/ob_stat_define.h index c42415f743..8695be95fc 100644 --- a/src/share/stat/ob_stat_define.h +++ b/src/share/stat/ob_stat_define.h @@ -56,7 +56,7 @@ enum StatOptionFlags const static double OPT_DEFAULT_STALE_PERCENT = 0.1; const static int64_t OPT_DEFAULT_STATS_RETENTION = 31; const static int64_t OPT_STATS_MAX_VALUE_CHAR_LEN = 128; -const static int64_t OPT_STATS_BIG_TABLE_ROWS = 10000000; +const int64_t MAX_AUTO_GATHER_FULL_TABLE_ROWS = 100000000; const int64_t MAGIC_SAMPLE_SIZE = 5500; const int64_t MAGIC_MAX_AUTO_SAMPLE_SIZE = 22000; const int64_t MAGIC_MIN_SAMPLE_SIZE = 2500; @@ -138,7 +138,9 @@ struct BlockNumStat tab_macro_cnt_(0), tab_micro_cnt_(0), cg_macro_cnt_arr_(), - cg_micro_cnt_arr_() + cg_micro_cnt_arr_(), + sstable_row_cnt_(0), + memtable_row_cnt_(0) { cg_macro_cnt_arr_.set_attr(ObMemAttr(MTL_ID(), "BlockNumStat")); cg_micro_cnt_arr_.set_attr(ObMemAttr(MTL_ID(), "BlockNumStat")); @@ -147,10 +149,14 @@ struct BlockNumStat int64_t tab_micro_cnt_; ObSEArray cg_macro_cnt_arr_; ObSEArray cg_micro_cnt_arr_; + int64_t sstable_row_cnt_; + int64_t memtable_row_cnt_; TO_STRING_KV(K(tab_macro_cnt_), K(tab_micro_cnt_), K(cg_macro_cnt_arr_), - K(cg_micro_cnt_arr_)) + K(cg_micro_cnt_arr_), + K(sstable_row_cnt_), + K(memtable_row_cnt_)) }; //TODO@jiangxiu.wt: improve the expression of PartInfo, use the map is better. @@ -475,6 +481,12 @@ struct ObTableStatParam { bool is_specify_column_gather() const; + int64_t get_need_gather_column() const; + + bool need_gather_stats() const { return global_stat_param_.need_modify_ || + part_stat_param_.need_modify_ || + subpart_stat_param_.need_modify_; } + uint64_t tenant_id_; ObString db_name_; @@ -599,9 +611,11 @@ struct ObOptStatGatherParam { global_part_id_(-1), gather_vectorize_(DEFAULT_STAT_GATHER_VECTOR_BATCH_SIZE), sepcify_scn_(0), - use_column_store_(false) + use_column_store_(false), + is_specify_partition_(false) {} int assign(const ObOptStatGatherParam &other); + int64_t get_need_gather_column() const; uint64_t tenant_id_; ObString db_name_; ObString tab_name_; @@ -625,6 +639,7 @@ struct ObOptStatGatherParam { int64_t gather_vectorize_; uint64_t sepcify_scn_; bool use_column_store_; + bool is_specify_partition_; TO_STRING_KV(K(tenant_id_), K(db_name_), @@ -646,7 +661,8 @@ struct ObOptStatGatherParam { K(global_part_id_), K(gather_vectorize_), K(sepcify_scn_), - K(use_column_store_)); + K(use_column_store_), + K(is_specify_partition_)); }; struct ObOptStat @@ -664,9 +680,20 @@ struct ObOptStat struct ObHistogramParam { + ObHistogramParam(): + epc_(0), + minval_(NULL), + maxval_(NULL), + bkvals_(), + novals_(), + chvals_(), + eavals_(), + rpcnts_(), + eavs_(0) + {} int64_t epc_; //Number of buckets in histogram - ObString minval_; //Minimum value - ObString maxval_; //Maximum value + const ObObj *minval_; //Minimum value + const ObObj *maxval_; //Maximum value ObSEArray bkvals_; //Array of bucket numbers ObSEArray novals_; //Array of normalized end point values ObSEArray chvals_; //Array of dumped end point values @@ -712,14 +739,24 @@ struct ObSetTableStatParam struct ObSetColumnStatParam { + ObSetColumnStatParam(): + table_param_(), + distcnt_(0), + density_(0.0), + nullcnt_(0), + hist_param_(), + avgclen_(0), + flags_(0), + col_meta_() + {} ObTableStatParam table_param_; - int64_t distcnt_; double density_; int64_t nullcnt_; ObHistogramParam hist_param_; int64_t avgclen_; int64_t flags_; + common::ObObjMeta col_meta_; TO_STRING_KV(K(table_param_), K(distcnt_), @@ -727,7 +764,8 @@ struct ObSetColumnStatParam K(nullcnt_), K(hist_param_), K(avgclen_), - K(flags_)); + K(flags_), + K(col_meta_)); }; diff --git a/src/share/stat/ob_stat_item.cpp b/src/share/stat/ob_stat_item.cpp index 70feea23ed..59d4a98321 100644 --- a/src/share/stat/ob_stat_item.cpp +++ b/src/share/stat/ob_stat_item.cpp @@ -509,7 +509,8 @@ void ObGlobalTableStat::add(int64_t rc, int64_t rs, int64_t ds, int64_t mac, int } int ObGlobalTableStat::add(int64_t rc, int64_t rs, int64_t ds, int64_t mac, int64_t mic, - ObIArray &cg_macro_arr, ObIArray &cg_micro_arr) + ObIArray &cg_macro_arr, ObIArray &cg_micro_arr, + int64_t scnt, int64_t mcnt) { // skip empty partition int ret = OB_SUCCESS; @@ -520,6 +521,8 @@ int ObGlobalTableStat::add(int64_t rc, int64_t rs, int64_t ds, int64_t mac, int6 macro_block_count_ += mac; micro_block_count_ += mic; part_cnt_ ++; + sstable_row_cnt_ += scnt; + memtable_row_cnt_ += mcnt; if (cg_macro_arr.empty()) { //do nothing } else if (cg_macro_cnt_arr_.empty()) { diff --git a/src/share/stat/ob_stat_item.h b/src/share/stat/ob_stat_item.h index 37a4cbb853..17398afe18 100644 --- a/src/share/stat/ob_stat_item.h +++ b/src/share/stat/ob_stat_item.h @@ -301,13 +301,14 @@ public: ObGlobalTableStat() : row_count_(0), row_size_(0), data_size_(0), macro_block_count_(0), micro_block_count_(0), part_cnt_(0), last_analyzed_(0), - cg_macro_cnt_arr_(), cg_micro_cnt_arr_(), - stat_locked_(false) + cg_macro_cnt_arr_(), cg_micro_cnt_arr_(), stat_locked_(false), + sstable_row_cnt_(0), memtable_row_cnt_(0) {} void add(int64_t rc, int64_t rs, int64_t ds, int64_t mac, int64_t mic); int add(int64_t rc, int64_t rs, int64_t ds, int64_t mac, int64_t mic, - ObIArray &cg_macro_arr, ObIArray &cg_micro_arr); + ObIArray &cg_macro_arr, ObIArray &cg_micro_arr, + int64_t scnt, int64_t mcnt); int64_t get_row_count() const; int64_t get_avg_row_size() const; @@ -320,6 +321,8 @@ public: void set_last_analyzed(int64_t last_analyzed) { last_analyzed_ = last_analyzed; } void set_stat_locked(bool locked) { stat_locked_ = locked; } bool get_stat_locked() const { return stat_locked_; } + int64_t get_sstable_row_cnt() const { return sstable_row_cnt_; } + int64_t get_memtable_row_cnt() const { return memtable_row_cnt_; } TO_STRING_KV(K(row_count_), @@ -331,7 +334,9 @@ public: K(last_analyzed_), K(cg_macro_cnt_arr_), K(cg_micro_cnt_arr_), - K(stat_locked_)); + K(stat_locked_), + K(sstable_row_cnt_), + K(memtable_row_cnt_)); private: int64_t row_count_; @@ -344,6 +349,8 @@ private: ObArray cg_macro_cnt_arr_; ObArray cg_micro_cnt_arr_; bool stat_locked_; + int64_t sstable_row_cnt_; + int64_t memtable_row_cnt_; }; class ObGlobalNullEval diff --git a/src/share/stat/ob_stats_estimator.cpp b/src/share/stat/ob_stats_estimator.cpp index 69a7ae3f84..f1ee879a73 100644 --- a/src/share/stat/ob_stats_estimator.cpp +++ b/src/share/stat/ob_stats_estimator.cpp @@ -95,7 +95,7 @@ int ObStatsEstimator::fill_sample_info(common::ObIAllocator &alloc, bool block_sample) { int ret = OB_SUCCESS; - if (est_percent>= 0.000001 && est_percent <= 100.0) { + if (est_percent>= 0.000001 && est_percent < 100.0) { char *buf = NULL; int32_t buf_len = 50;//double类型一般15~16位,加上字符长度16左右,因此数组长度为50足够用 int64_t real_len = -1; diff --git a/src/sql/engine/cmd/ob_analyze_executor.cpp b/src/sql/engine/cmd/ob_analyze_executor.cpp index 74d9cce000..dc821278ef 100644 --- a/src/sql/engine/cmd/ob_analyze_executor.cpp +++ b/src/sql/engine/cmd/ob_analyze_executor.cpp @@ -70,12 +70,23 @@ int ObAnalyzeExecutor::execute(ObExecContext &ctx, ObAnalyzeStmt &stmt) } if (OB_SUCC(ret)) { if (stmt.is_delete_histogram()) { - //must be only one param - if (OB_FAIL(ObDbmsStatsExecutor::delete_table_stats(ctx, params.at(0), true))) { - LOG_WARN("failed to drop table stats", K(ret)); - } else { - LOG_TRACE("succeed to drop table stats", K(params)); + bool cascade_columns = true; + bool cascade_indexes = true; + if (OB_UNLIKELY(params.count() != 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(params)); + } else if (OB_FAIL(ObDbmsStatsLockUnlock::check_stat_locked(ctx, params.at(0)))) { + LOG_WARN("failed fill stat locked", K(ret)); + } else if (OB_FAIL(ObDbmsStatsExecutor::delete_table_stats(ctx, params.at(0), cascade_columns))) { + LOG_WARN("failed to delete table stats", K(ret)); + } else if (OB_FAIL(pl::ObDbmsStats::update_stat_cache(session->get_rpc_tenant_id(), params.at(0)))) { + LOG_WARN("failed to update stat cache", K(ret)); + } else if (cascade_indexes && params.at(0).part_name_.empty()) { + if (OB_FAIL(pl::ObDbmsStats::delete_table_index_stats(ctx, params.at(0)))) { + LOG_WARN("failed to delete index stats", K(ret)); + } else {/*do nothing*/} } + LOG_TRACE("succeed to drop table stats", K(params)); } else { int64_t task_cnt = params.count(); int64_t start_time = ObTimeUtility::current_time(); diff --git a/src/sql/optimizer/ob_dynamic_sampling.cpp b/src/sql/optimizer/ob_dynamic_sampling.cpp index f85728ac26..6a4b3600b0 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.cpp +++ b/src/sql/optimizer/ob_dynamic_sampling.cpp @@ -492,6 +492,7 @@ int ObDynamicSampling::estimte_rowcount(int64_t max_ds_timeout, ObSqlString raw_sql_str; ObSqlString sample_str; ObSqlString basic_hint_str; + ObSqlString table_str; bool is_no_backslash_escapes = false; int64_t nested_count = -1; sql::ObSQLSessionInfo::StmtSavedValue *session_value = NULL; @@ -499,10 +500,12 @@ int ObDynamicSampling::estimte_rowcount(int64_t max_ds_timeout, ObSQLSessionInfo *session_info = ctx_->get_session_info(); bool need_restore_session = false; transaction::ObTxDesc *tx_desc = NULL; - if (OB_FAIL(add_block_sample_info(sample_block_ratio_, seed_, sample_str))) { + if (!is_big_table_ && OB_FAIL(add_block_sample_info(sample_block_ratio_, seed_, sample_str))) { LOG_WARN("failed to add block sample info", K(ret)); - } else if (OB_FAIL(add_basic_hint_info(basic_hint_str, max_ds_timeout, degree))) { + } else if (OB_FAIL(add_basic_hint_info(basic_hint_str, max_ds_timeout, is_big_table_ ? 1 : degree))) { LOG_WARN("failed to add basic hint info", K(ret)); + } else if (OB_FAIL(add_table_clause(table_str))) { + LOG_WARN("failed to add table clause", K(ret)); } else if (OB_FAIL(pack(raw_sql_str))) { LOG_WARN("failed to pack dynamic sampling", K(ret)); } else if (OB_FAIL(prepare_and_store_session(session_info, session_value, @@ -602,24 +605,14 @@ int ObDynamicSampling::pack(ObSqlString &raw_sql_str) if (OB_FAIL(gen_select_filed(select_fields))) { LOG_WARN("failed to generate select filed", K(ret)); } else if (OB_FAIL(raw_sql_str.append_fmt(lib::is_oracle_mode() ? - "SELECT %.*s %.*s FROM \"%.*s\".\"%.*s\" %.*s %.*s %s%.*s%s %s %.*s" : - "SELECT %.*s %.*s FROM `%.*s`.`%.*s` %.*s %.*s %s%.*s%s %s %.*s" , + "SELECT %.*s %.*s FROM %.*s %s %.*s" : + "SELECT %.*s %.*s FROM %.*s %s %.*s" , basic_hints_.length(), basic_hints_.ptr(), static_cast(select_fields.length()), select_fields.ptr(), - db_name_.length(), - db_name_.ptr(), - table_name_.length(), - table_name_.ptr(), - partition_list_.length(), - partition_list_.ptr(), - sample_block_.length(), - sample_block_.ptr(), - alias_name_.empty() ? " " : (lib::is_oracle_mode() ? "\"" : "`"), - alias_name_.length(), - alias_name_.ptr(), - alias_name_.empty() ? " " : (lib::is_oracle_mode() ? "\"" : "`"), + table_clause_.length(), + table_clause_.ptr(), where_conditions_.empty() ? " " : "WHERE", where_conditions_.length(), where_conditions_.ptr()))) { @@ -779,37 +772,48 @@ int ObDynamicSampling::calc_table_sample_block_ratio(const ObDSTableParam ¶m if (param.is_virtual_table_) { sample_block_ratio_ = 100.0; seed_ = param.degree_ > 1 ? 0 : 1; - } else if (OB_UNLIKELY((sample_micro_cnt = get_dynamic_sampling_micro_block_num(param)) < 1)) { + } else if (OB_UNLIKELY((sample_micro_cnt = get_dynamic_sampling_micro_block_num(param)) < 1 || + (micro_block_num_ > 0 && memtable_row_count_ + sstable_row_count_ <= 0))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(param)); } else if (OB_FAIL(estimate_table_block_count_and_row_count(param))) { LOG_WARN("failed to estimate table block count and row count", K(ret)); } else { - //1.retire to memtable sample - if (sstable_row_count_ < memtable_row_count_) { - double sample_row_cnt = MAGIC_MAX_AUTO_SAMPLE_SIZE * (1.0 * sample_micro_cnt / OB_DS_BASIC_SAMPLE_MICRO_CNT); - if (memtable_row_count_ < sample_row_cnt) { - sample_block_ratio_ = 100.0; - } else { - sample_block_ratio_ = 100.0 * sample_row_cnt / memtable_row_count_; - } - } else { - //2.use the block sample - if (sample_micro_cnt >= micro_block_num_) { - sample_block_ratio_ = 100.0; - } else { - sample_block_ratio_ = 100.0 * sample_micro_cnt / micro_block_num_; - } - } - //3.try adjust sample block ratio according to the degree - if (param.degree_ > 1 && sample_block_ratio_ < 100.0) {//adjust sample ratio according to the degree. - sample_block_ratio_ = sample_block_ratio_ * param.degree_; + int64_t max_allowed_multiple = sample_micro_cnt > OB_DS_BASIC_SAMPLE_MICRO_CNT ? sample_micro_cnt / OB_DS_BASIC_SAMPLE_MICRO_CNT : 1; + if (micro_block_num_ > OB_DS_MAX_BASIC_SAMPLE_MICRO_CNT * max_allowed_multiple && + MAGIC_MAX_AUTO_SAMPLE_SIZE * max_allowed_multiple < memtable_row_count_ + sstable_row_count_) { + is_big_table_ = true; + sample_big_table_rown_cnt_ = MAGIC_MAX_AUTO_SAMPLE_SIZE * max_allowed_multiple; + sample_block_ratio_ = 100.0 * sample_big_table_rown_cnt_ / (memtable_row_count_ + sstable_row_count_); sample_block_ratio_ = sample_block_ratio_ < 100.0 ? sample_block_ratio_ : 100.0; + } else { + //1.retire to memtable sample + if (sstable_row_count_ < memtable_row_count_) { + double sample_row_cnt = MAGIC_MAX_AUTO_SAMPLE_SIZE * (1.0 * sample_micro_cnt / OB_DS_BASIC_SAMPLE_MICRO_CNT); + if (memtable_row_count_ < sample_row_cnt) { + sample_block_ratio_ = 100.0; + } else { + sample_block_ratio_ = 100.0 * sample_row_cnt / (memtable_row_count_ + sstable_row_count_); + } + } else { + //2.use the block sample + if (sample_micro_cnt >= micro_block_num_) { + sample_block_ratio_ = 100.0; + } else { + sample_block_ratio_ = 100.0 * sample_micro_cnt / micro_block_num_; + } + } + //3.try adjust sample block ratio according to the degree + if (param.degree_ > 1 && sample_block_ratio_ < 100.0) {//adjust sample ratio according to the degree. + sample_block_ratio_ = sample_block_ratio_ * param.degree_; + sample_block_ratio_ = sample_block_ratio_ < 100.0 ? sample_block_ratio_ : 100.0; + } + sample_block_ratio_ = sample_block_ratio_ < 0.000001 ? 0.000001 : sample_block_ratio_; + //4.adjust the seed. + seed_ = (param.degree_ > 1 || param.partition_infos_.count() > 1) ? 0 : 1; } - //4.adjust the seed. - seed_ = (param.degree_ > 1 || param.partition_infos_.count() > 1) ? 0 : 1; } - LOG_TRACE("succeed to calc table sample block ratio", K(param), K(seed_), K(sample_micro_cnt), + LOG_TRACE("succeed to calc table sample block ratio", K(param), K(seed_), K(sample_micro_cnt), K(is_big_table_), K(sample_block_ratio_), K(micro_block_num_), K(sstable_row_count_), K(memtable_row_count_)); return ret; @@ -1229,6 +1233,50 @@ bool ObDynamicSampling::all_ds_col_stats_are_gathered(const ObDSTableParam ¶ return res; } +int ObDynamicSampling::add_table_clause(ObSqlString &table_str) +{ + int ret = OB_SUCCESS; + int64_t big_table_row = 100000; + if (OB_UNLIKELY(is_big_table_ && sample_big_table_rown_cnt_ < MAGIC_MAX_AUTO_SAMPLE_SIZE)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(sample_big_table_rown_cnt_), K(is_big_table_)); + } else if (is_big_table_ && OB_FAIL(table_str.append_fmt(lib::is_oracle_mode() ? "(SELECT * FROM \"%.*s\".\"%.*s\" %.*s %.*s FETCH NEXT %ld ROWS ONLY) %s%.*s%s" : + "(SELECT * FROM `%.*s`.`%.*s` %.*s %.*s LIMIT %ld) %s%.*s%s" , + db_name_.length(), + db_name_.ptr(), + table_name_.length(), + table_name_.ptr(), + partition_list_.length(), + partition_list_.ptr(), + sample_block_.length(), + sample_block_.ptr(), + sample_big_table_rown_cnt_, + alias_name_.empty() ? " " : (lib::is_oracle_mode() ? "\"" : "`"), + alias_name_.length(), + alias_name_.ptr(), + alias_name_.empty() ? " " : (lib::is_oracle_mode() ? "\"" : "`")))) { + LOG_WARN("failed to append fmt", K(ret)); + } else if (!is_big_table_ && OB_FAIL(table_str.append_fmt(lib::is_oracle_mode() ? "\"%.*s\".\"%.*s\" %.*s %.*s %s%.*s%s" : + "`%.*s`.`%.*s` %.*s %.*s %s%.*s%s" , + db_name_.length(), + db_name_.ptr(), + table_name_.length(), + table_name_.ptr(), + partition_list_.length(), + partition_list_.ptr(), + sample_block_.length(), + sample_block_.ptr(), + alias_name_.empty() ? " " : (lib::is_oracle_mode() ? "\"" : "`"), + alias_name_.length(), + alias_name_.ptr(), + alias_name_.empty() ? " " : (lib::is_oracle_mode() ? "\"" : "`")))) { + LOG_WARN("failed to append fmt", K(ret)); + } else { + table_clause_ = table_str.string(); + } + return ret; +} + int ObDynamicSamplingUtils::get_valid_dynamic_sampling_level(const ObSQLSessionInfo *session_info, const ObTableDynamicSamplingHint *table_ds_hint, const int64_t global_ds_level, diff --git a/src/sql/optimizer/ob_dynamic_sampling.h b/src/sql/optimizer/ob_dynamic_sampling.h index dc9449ce0a..dc549573a2 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.h +++ b/src/sql/optimizer/ob_dynamic_sampling.h @@ -200,6 +200,7 @@ static T *copy_ds_stat_item(ObIAllocator &allocator, const T &src) const int64_t OB_DS_BASIC_SAMPLE_MICRO_CNT = 32; const int64_t OB_DS_MAX_FILTER_EXPR_COUNT = 10000; const int64_t OB_DS_MIN_QUERY_TIMEOUT = 1000;//Dynamic sampling requires a minimum timeout of 1ms. +const int64_t OB_DS_MAX_BASIC_SAMPLE_MICRO_CNT = 1000000; //const int64_t OB_OPT_DS_ADAPTIVE_SAMPLE_MICRO_CNT = 200; //const int64_t OB_OPT_DS_MAX_TIMES = 7; @@ -223,7 +224,10 @@ public: basic_hints_(), where_conditions_(), ds_stat_items_(), - results_() + results_(), + is_big_table_(false), + sample_big_table_rown_cnt_(0), + table_clause_() {} int estimate_table_rowcount(const ObDSTableParam ¶m, @@ -318,6 +322,7 @@ private: int64_t nested_count, bool is_no_backslash_escapes, transaction::ObTxDesc *tx_desc); + int add_table_clause(ObSqlString &table_str); private: ObOptimizerContext *ctx_; @@ -337,6 +342,9 @@ private: ObString where_conditions_; ObSEArray ds_stat_items_; ObSEArray results_; + bool is_big_table_; + int64_t sample_big_table_rown_cnt_; + ObString table_clause_; //following members will be used for dynamic sampling join in the future //ObString join_type_; //ObString join_conditions_;