From ca1fcada830194d15eb3a340c39a41ce9575ce1d Mon Sep 17 00:00:00 2001 From: wangt1xiuyi <13547954130@163.com> Date: Fri, 10 Feb 2023 11:11:04 +0000 Subject: [PATCH] update histogram buckets from pointer to ObArrayWrap --- src/share/stat/ob_dbms_stats_executor.cpp | 8 +- .../stat/ob_dbms_stats_export_import.cpp | 71 ++++++-- src/share/stat/ob_dbms_stats_export_import.h | 2 + .../stat/ob_dbms_stats_history_manager.cpp | 2 +- src/share/stat/ob_dbms_stats_utils.cpp | 19 +-- src/share/stat/ob_dbms_stats_utils.h | 3 +- src/share/stat/ob_hybrid_hist_estimator.cpp | 11 +- .../stat/ob_incremental_stat_estimator.cpp | 38 ++--- .../stat/ob_incremental_stat_estimator.h | 2 +- src/share/stat/ob_opt_column_stat.cpp | 153 ++++-------------- src/share/stat/ob_opt_column_stat.h | 64 +++----- src/share/stat/ob_opt_stat_sql_service.cpp | 55 +++---- src/share/stat/ob_stat_item.cpp | 42 +++-- src/share/stat/ob_stat_item.h | 15 +- src/share/stat/ob_stats_estimator.cpp | 14 +- src/share/stat/ob_stats_estimator.h | 2 +- src/sql/optimizer/ob_log_plan.cpp | 5 +- src/sql/optimizer/ob_opt_selectivity.cpp | 30 ++-- .../resolver/ddl/ob_analyze_stmt_resolver.cpp | 15 ++ .../resolver/ddl/ob_analyze_stmt_resolver.h | 3 + unittest/sql/optimizer/test_opt_est_sel.cpp | 2 +- .../sql/optimizer/test_optimizer_utils.cpp | 12 +- unittest/sql/optimizer/test_optimizer_utils.h | 1 + 23 files changed, 270 insertions(+), 299 deletions(-) diff --git a/src/share/stat/ob_dbms_stats_executor.cpp b/src/share/stat/ob_dbms_stats_executor.cpp index 886244815..66b3f9e7a 100644 --- a/src/share/stat/ob_dbms_stats_executor.cpp +++ b/src/share/stat/ob_dbms_stats_executor.cpp @@ -172,19 +172,17 @@ int ObDbmsStatsExecutor::check_all_cols_range_skew(const ObTableStatParam ¶m ObHistogram &hist = col_stats.at(j)->get_histogram(); if ((hist.get_type() == ObHistType::FREQUENCY && col_param.is_size_skewonly()) || hist.get_type() == ObHistType::HYBIRD) { - if (OB_ISNULL(hist.get_buckets()) || - OB_UNLIKELY(hist.get_bucket_size() < 1 || col_param.bucket_num_ < 1)) { + if (OB_UNLIKELY(hist.get_bucket_size() < 1 || col_param.bucket_num_ < 1)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(hist.get_buckets()), K(hist.get_bucket_size()), + LOG_WARN("get unexpected error", K(ret), K(hist.get_bucket_size()), K(col_param.bucket_num_), K(*col_stats.at(j))); } else { bool is_even_dist = false; int64_t standard_cnt = hist.get_type() == ObHistType::FREQUENCY ? - hist.get_buckets()[0].endpoint_num_ : + hist.get_buckets().at(0).endpoint_num_ : hist.get_sample_size() / col_param.bucket_num_; if (OB_FAIL(ObDbmsStatsUtils::check_range_skew(hist.get_type(), hist.get_buckets(), - hist.get_bucket_size(), standard_cnt, is_even_dist))) { LOG_WARN("failed to check range skew", K(ret)); diff --git a/src/share/stat/ob_dbms_stats_export_import.cpp b/src/share/stat/ob_dbms_stats_export_import.cpp index 84b1201a6..19684e906 100644 --- a/src/share/stat/ob_dbms_stats_export_import.cpp +++ b/src/share/stat/ob_dbms_stats_export_import.cpp @@ -40,7 +40,7 @@ namespace common { #define CREATE_MYSQL_STAT_TABLE "(STATID VARCHAR(128), TYPE CHAR(1), VERSION DECIMAL,FLAGS DECIMAL,\ C1 VARCHAR(128),C2 VARCHAR(128), C3 VARCHAR(128),C4 VARCHAR(128),\ - C5 VARCHAR(128), C6 VARCHAR(128), N1 DECIMAL, N2 DECIMAL,\ + C5 VARCHAR(128), C6 VARCHAR(128), N1 DECIMAL, N2 DOUBLE,\ N3 DECIMAL, N4 DECIMAL, N5 DECIMAL, N6 DECIMAL, N7 DECIMAL,\ N8 DECIMAL, N9 DECIMAL, N10 DECIMAL, N11 DECIMAL, N12 DECIMAL,\ N13 DECIMAL, D1 TIMESTAMP(6), T1 TIMESTAMP, R1 TEXT(1000), \ @@ -73,7 +73,7 @@ namespace common { stat.distinct_cnt n1, stat.density n2, null n3, stat.sample_size n4, \ stat.null_cnt n5, NULL n6, NULL n7, stat.avg_len n8, 1 n9, \ hist.endpoint_num n10, hist.endpoint_normalized_value n11, \ - hist.endpoint_repeat_cnt n12, null n13, last_analyzed d1, null t1, \ + hist.endpoint_repeat_cnt n12, stat.bucket_cnt n13, last_analyzed d1, null t1, \ stat.b_min_value r1, stat.b_max_value r2, hist.b_endpoint_value r3, \ null ch1, null cl1, null bl1, stat.distinct_cnt_synopsis_size ob_spec1,\ null ob_spec2, stat.distinct_cnt_synopsis ob_spec3 from\ @@ -600,8 +600,9 @@ int ObDbmsStatsExportImport::do_import_stats(ObExecContext &ctx, } if (OB_ITER_END != ret) { LOG_WARN("failed to get result", K(ret)); + } else if (OB_FAIL(check_col_stat_validity(all_cstats))) { + LOG_WARN("failed to check col stat validity", K(ret)); } else { - ret = OB_SUCCESS; ObSEArray history_tab_handles; ObSEArray history_col_handles; //before import, we need record history stats. @@ -656,7 +657,7 @@ int ObDbmsStatsExportImport::do_import_stats(ObExecContext &ctx, * 19.N10 NUMBER <==> Endpoint number('C') * 20.N11 NUMBER <==> Endpoint value('C') * 21.N12 NUMBER <==> ENDPOINT_REPEAT_COUNT('C') - * 22.N13 NUMBER <==> NULL + * 22.N13 NUMBER <==> bucket_cnt('C') * 23.D1 DATE <==> Last analyzed * 24.T1 TIMESTAMP(6) WITH TIME ZONE <==> NULL * 25.R1 RAW(32) <==> Lower raw value('C') @@ -830,14 +831,16 @@ int ObDbmsStatsExportImport::get_opt_stat(ObExecContext &ctx, number::ObNumber num_val; int64_t int_val = 0; double double_val = 0.0; - if (!result_objs.at(i).is_null() && OB_FAIL(result_objs.at(i).get_number(num_val))) { - LOG_WARN("failed to get number", K(ret)); + if (result_objs.at(i).is_number() && + (OB_FAIL(result_objs.at(i).get_number(num_val)) || + OB_FAIL(ObDbmsStatsUtils::cast_number_to_double(num_val, double_val)))) { + LOG_WARN("failed to get double", K(ret)); + } else if (result_objs.at(i).is_double() && OB_FAIL(result_objs.at(i).get_double(double_val))) { + LOG_WARN("failed to get double", K(ret)); } else if (stat_type == TABLE_STAT || stat_type == INDEX_STAT) { /*do nothing*/ } else if (stat_type == COLUMN_STAT) { - if (OB_FAIL(ObDbmsStatsUtils::cast_number_to_double(num_val, double_val))) { - LOG_WARN("failed to cast number to double" , K(ret)); - } else if (double_val > 0.0) { + if (double_val > 0.0) { /*do nothing*/ if (OB_UNLIKELY(hist_type == INVALID_TYPE)) { ret = OB_ERR_DBMS_STATS_PL; @@ -972,7 +975,31 @@ int ObDbmsStatsExportImport::get_opt_stat(ObExecContext &ctx, } break; } - case StatTableColumnName::N13: {//not used + case StatTableColumnName::N13: {//bucket_cnt('C') + number::ObNumber num_val; + int64_t int_val = 0; + if (stat_type != COLUMN_STAT) { + if (OB_UNLIKELY(!result_objs.at(i).is_null())) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Invalid or inconsistent input values", K(ret), K(result_objs.at(i))); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Invalid or inconsistent input values"); + } + } else if (!result_objs.at(i).is_null() && + OB_FAIL(result_objs.at(i).get_number(num_val))) { + LOG_WARN("failed to get number", K(ret)); + } else if (OB_FAIL(num_val.extract_valid_int64_with_trunc(int_val))) { + LOG_WARN("extract_valid_int64_with_trunc failed", K(ret), K(num_val)); + } else if (int_val > 0) { + if (OB_UNLIKELY(col_stat->get_histogram().get_density() <= 0.0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(result_objs), K(ret), KPC(col_stat)); + } else if (col_stat->get_histogram().get_buckets().empty()) { + if (OB_FAIL(col_stat->get_histogram().prepare_allocate_buckets(ctx.get_allocator(), + int_val))) { + LOG_WARN("failed to prepare allocate buckets", K(ret)); + } else {/*do nothing*/} + } + } break; } case StatTableColumnName::D1: {//Last analyzed @@ -1063,8 +1090,13 @@ int ObDbmsStatsExportImport::get_opt_stat(ObExecContext &ctx, result_objs.at(i), hist_bucket.endpoint_value_))) { LOG_WARN("failed to convert bin hex text to obj", K(ret)); + } else if (OB_UNLIKELY(col_stat->get_histogram().get_bucket_cnt() >= + col_stat->get_histogram().get_bucket_size())) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Invalid or inconsistent input values", K(ret), K(result_objs.at(i))); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Invalid or inconsistent input values"); } else if (OB_FAIL(col_stat->get_histogram().add_bucket(hist_bucket))) { - LOG_WARN("failed to add bucket", K(ret)); + LOG_WARN("failed to push back", K(ret)); } else {/*do nothing*/} break; } @@ -1533,5 +1565,22 @@ int ObDbmsStatsExportImport::gen_import_column_list(const ObIArray &all_cstats) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < all_cstats.count(); ++i) { + if (OB_ISNULL(all_cstats.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(all_cstats.at(i)), K(ret)); + } else if (OB_UNLIKELY(all_cstats.at(i)->get_histogram().get_bucket_cnt() != + all_cstats.at(i)->get_histogram().get_bucket_size())) { + ret = OB_ERR_DBMS_STATS_PL; + LOG_WARN("Invalid or inconsistent input values", K(ret), KPC(all_cstats.at(i))); + LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Invalid or inconsistent input values"); + } + } + return ret; +} + } // namespace common } // namespace oceanbase diff --git a/src/share/stat/ob_dbms_stats_export_import.h b/src/share/stat/ob_dbms_stats_export_import.h index 9ef7df4d2..1f54b2dac 100644 --- a/src/share/stat/ob_dbms_stats_export_import.h +++ b/src/share/stat/ob_dbms_stats_export_import.h @@ -155,6 +155,8 @@ private: static int gen_import_column_list(const ObIArray &column_param, ObSqlString &column_list); + static int check_col_stat_validity(ObIArray &all_cstats); + }; } // end of sql diff --git a/src/share/stat/ob_dbms_stats_history_manager.cpp b/src/share/stat/ob_dbms_stats_history_manager.cpp index 828178515..30d30a753 100644 --- a/src/share/stat/ob_dbms_stats_history_manager.cpp +++ b/src/share/stat/ob_dbms_stats_history_manager.cpp @@ -529,7 +529,7 @@ int ObDbmsStatsHistoryManager::fill_column_stat_history(ObIAllocator &allocator, EXTRACT_INT_FIELD_MYSQL(result, "distinct_cnt_synopsis_size", llc_bitmap_size, int64_t); if (OB_SUCC(ret)) { hist.set_type(histogram_type); - if (hist.is_valid() && OB_FAIL(hist.prepare_allocate_buckets(bucket_cnt))) { + if (hist.is_valid() && OB_FAIL(hist.prepare_allocate_buckets(allocator, bucket_cnt))) { LOG_WARN("failed to prepare allocate buckets", K(ret)); } } diff --git a/src/share/stat/ob_dbms_stats_utils.cpp b/src/share/stat/ob_dbms_stats_utils.cpp index 4892a8d14..31ec498e2 100644 --- a/src/share/stat/ob_dbms_stats_utils.cpp +++ b/src/share/stat/ob_dbms_stats_utils.cpp @@ -89,29 +89,26 @@ int ObDbmsStatsUtils::init_col_stats(ObIAllocator &allocator, * then it's even distributed, Otherwise, it's skewed. */ int ObDbmsStatsUtils::check_range_skew(ObHistType hist_type, - const ObHistBucket *bkts, - const int64_t bkt_size, + const ObHistogram::Buckets &bkts, int64_t standard_cnt, bool &is_even_distributed) { int ret = OB_SUCCESS; is_even_distributed = false; - if (OB_ISNULL(bkts) || OB_UNLIKELY(bkt_size == 0)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(bkts), K(bkt_size)); - } else if (hist_type == ObHistType::FREQUENCY) { + if (hist_type == ObHistType::FREQUENCY) { is_even_distributed = true; - for (int64_t i = 0; is_even_distributed && i < bkt_size; ++i) { + for (int64_t i = 0; is_even_distributed && i < bkts.count(); ++i) { if (i == 0) { - is_even_distributed = standard_cnt == bkts[i].endpoint_num_; + is_even_distributed = standard_cnt == bkts.at(i).endpoint_num_; } else { - is_even_distributed = standard_cnt == bkts[i].endpoint_num_ - bkts[i - 1].endpoint_num_; + is_even_distributed = standard_cnt == bkts.at(i).endpoint_num_ - + bkts.at(i - 1).endpoint_num_; } } } else if (hist_type == ObHistType::HYBIRD) { is_even_distributed = true; - for (int64_t i = 0; is_even_distributed && i < bkt_size; ++i) { - is_even_distributed = bkts[i].endpoint_repeat_count_ <= standard_cnt; + for (int64_t i = 0; is_even_distributed && i < bkts.count(); ++i) { + is_even_distributed = bkts.at(i).endpoint_repeat_count_ <= standard_cnt; } } else {/*do nothing*/} return ret; diff --git a/src/share/stat/ob_dbms_stats_utils.h b/src/share/stat/ob_dbms_stats_utils.h index 733460962..3721e5ef5 100644 --- a/src/share/stat/ob_dbms_stats_utils.h +++ b/src/share/stat/ob_dbms_stats_utils.h @@ -34,8 +34,7 @@ public: ObIArray &col_stats); static int check_range_skew(ObHistType hist_type, - const ObHistBucket *bkts, - const int64_t bkt_size, + const ObHistogram::Buckets &bkts, int64_t standard_cnt, bool &is_even_distributed); diff --git a/src/share/stat/ob_hybrid_hist_estimator.cpp b/src/share/stat/ob_hybrid_hist_estimator.cpp index cbdf8fff9..7af93ae65 100644 --- a/src/share/stat/ob_hybrid_hist_estimator.cpp +++ b/src/share/stat/ob_hybrid_hist_estimator.cpp @@ -420,7 +420,7 @@ int ObHybridHistEstimator::try_build_hybrid_hist(const ObColumnStatParam ¶m, ObSEArray pairs; for (int64_t i = 0; OB_SUCC(ret) && i < col_stat.get_histogram().get_bucket_size(); ++i) { - const ObHistBucket &hist_bucket = col_stat.get_histogram().get_buckets()[i]; + const ObHistBucket &hist_bucket = col_stat.get_histogram().get(i); if (OB_FAIL(pairs.push_back(BucketNode(hist_bucket.endpoint_value_, hist_bucket.endpoint_repeat_count_)))) { LOG_WARN("failed to push back new entry", K(ret)); @@ -434,9 +434,12 @@ int ObHybridHistEstimator::try_build_hybrid_hist(const ObColumnStatParam ¶m, num_distinct))) { LOG_WARN("failed to do build hybrid hist", K(ret)); } else { - col_stat.get_histogram().reset(); - if (OB_FAIL(col_stat.get_histogram().add_buckets(hybrid_hist.get_buckets()))) { - LOG_WARN("failed to append hist bucket", K(ret)); + col_stat.get_histogram().get_buckets().reset(); + if (OB_FAIL(col_stat.get_histogram().prepare_allocate_buckets(ctx_.get_allocator(), + hybrid_hist.get_buckets().count()))) { + LOG_WARN("failed to prepare allocate buckets", K(ret)); + } else if (OB_FAIL(col_stat.get_histogram().assign_buckets(hybrid_hist.get_buckets()))) { + LOG_WARN("failed to assign buckets", K(ret)); } else { col_stat.get_histogram().set_type(ObHistType::HYBIRD); col_stat.get_histogram().set_sample_size(total_count); diff --git a/src/share/stat/ob_incremental_stat_estimator.cpp b/src/share/stat/ob_incremental_stat_estimator.cpp index d3f74050c..10ccda032 100644 --- a/src/share/stat/ob_incremental_stat_estimator.cpp +++ b/src/share/stat/ob_incremental_stat_estimator.cpp @@ -607,7 +607,7 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx, ObGlobalNotNullEval not_null_eval; ObGlobalNdvEval ndv_eval; ObGlobalAvglenEval avglen_eval; - ObSEArray all_part_histograms; + ObSEArray all_part_histograms; int64_t total_avg_len = 0; int64_t max_bucket_num = param.column_params_.at(i).bucket_num_; for (int64_t j = 0; OB_SUCC(ret) && j < part_cnt; ++j) { @@ -631,7 +631,7 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx, } else if (opt_col_stat->get_num_distinct() == 0 && opt_col_stat->get_num_null() == 0) { /*do nothing*/ } else if (need_drive_hist && opt_col_stat->get_histogram().is_valid() && - OB_FAIL(all_part_histograms.push_back(&opt_col_stat->get_histogram()))) { + OB_FAIL(all_part_histograms.push_back(opt_col_stat->get_histogram()))) { LOG_WARN("failed to push back histogram", K(ret)); } else { need_drive_hist &= opt_col_stat->get_histogram().is_valid(); @@ -706,7 +706,7 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx, return ret; } -int ObIncrementalStatEstimator::derive_global_histogram(ObIArray &all_part_histograms, +int ObIncrementalStatEstimator::derive_global_histogram(ObIArray &all_part_histograms, common::ObIAllocator &allocator, int64_t max_bucket_num, int64_t total_row_count, @@ -727,31 +727,26 @@ int ObIncrementalStatEstimator::derive_global_histogram(ObIArray & top_k_fre_hist->set_window_size(1000); top_k_fre_hist->set_item_size(256); for (int64_t i = 0; OB_SUCC(ret) && i < all_part_histograms.count(); ++i) { - if (all_part_histograms.at(i) != NULL && all_part_histograms.at(i)->is_valid()) { - if (all_part_histograms.at(i)->get_type() == ObHistType::FREQUENCY || - all_part_histograms.at(i)->get_type() == ObHistType::TOP_FREQUENCY || - all_part_histograms.at(i)->get_type() == ObHistType::HYBIRD) { - const ObHistBucket *part_bkts = all_part_histograms.at(i)->get_buckets(); - const int64_t part_bkt_size = all_part_histograms.at(i)->get_bucket_size(); - if (OB_ISNULL(part_bkts) || OB_UNLIKELY(part_bkt_size == 0)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret), K(part_bkts), K(part_bkt_size)); - } else { - for (int64_t j = 0; OB_SUCC(ret) && j < part_bkt_size; ++j) { - for (int64_t k = 0; OB_SUCC(ret) && k < part_bkts[j].endpoint_repeat_count_; ++k) { - if (OB_FAIL(top_k_fre_hist->add_top_k_frequency_item(part_bkts[j].endpoint_value_))) { - LOG_WARN("failed to add topk frequency item", K(ret)); - } else {/*do nothing*/} - } + if (all_part_histograms.at(i).is_valid()) { + if (all_part_histograms.at(i).get_type() == ObHistType::FREQUENCY || + all_part_histograms.at(i).get_type() == ObHistType::TOP_FREQUENCY || + all_part_histograms.at(i).get_type() == ObHistType::HYBIRD) { + const ObHistogram::Buckets &part_bkts = all_part_histograms.at(i).get_buckets(); + for (int64_t j = 0; OB_SUCC(ret) && j < part_bkts.count(); ++j) { + for (int64_t k = 0; OB_SUCC(ret) && k < part_bkts.at(j).endpoint_repeat_count_; ++k) { + if (OB_FAIL(top_k_fre_hist->add_top_k_frequency_item( + part_bkts.at(j).endpoint_value_))) { + LOG_WARN("failed to add topk frequency item", K(ret)); + } else {/*do nothing*/} } } } else { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected hist type", K(ret), K(all_part_histograms.at(i)->get_type())); + LOG_WARN("get unexpected hist type", K(ret), K(all_part_histograms.at(i).get_type())); } } else { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", KPC(all_part_histograms.at(i))); + LOG_WARN("get unexpected error", K(all_part_histograms.at(i))); } } if (OB_SUCC(ret)) { @@ -760,6 +755,7 @@ int ObIncrementalStatEstimator::derive_global_histogram(ObIArray & } else if (top_k_fre_hist->get_buckets().count() == 0) { /*do nothing*/ } else if (OB_FAIL(ObStatTopKHist::build_histogram_from_topk_items( + allocator, top_k_fre_hist->get_buckets(), max_bucket_num, total_row_count, diff --git a/src/share/stat/ob_incremental_stat_estimator.h b/src/share/stat/ob_incremental_stat_estimator.h index 67ac07024..c43a6b6e3 100644 --- a/src/share/stat/ob_incremental_stat_estimator.h +++ b/src/share/stat/ob_incremental_stat_estimator.h @@ -108,7 +108,7 @@ private: bool &need_gather_hybrid_hist, ObOptStat &global_opt_stat); - static int derive_global_histogram(ObIArray &all_part_histogram, + static int derive_global_histogram(ObIArray &all_part_histogram, common::ObIAllocator &allocator, int64_t max_bucket_num, int64_t total_row_count, diff --git a/src/share/stat/ob_opt_column_stat.cpp b/src/share/stat/ob_opt_column_stat.cpp index 22b0213a1..12f5d667a 100644 --- a/src/share/stat/ob_opt_column_stat.cpp +++ b/src/share/stat/ob_opt_column_stat.cpp @@ -42,18 +42,8 @@ void ObHistogram::reset() type_ = ObHistType::INVALID_TYPE; sample_size_ = -1; density_ = -1; - if (NULL != buckets_) { - for (int64_t i = 0; i < bucket_size_; ++i) { - buckets_[i].~ObHistBucket(); - } - get_allocator().free(buckets_); - inner_allocator_.reset(); - buckets_ = NULL; - bucket_size_ = 0; - max_bucket_size_ = 0; - } - pop_freq_ = 0; - pop_count_ = 0; + bucket_cnt_ = 0; + buckets_.reset(); } const char *ObHistogram::get_type_name() const @@ -74,8 +64,8 @@ const char *ObHistogram::get_type_name() const int64_t ObHistogram::deep_copy_size() const { int64_t size = sizeof(*this); - for (int64_t i = 0; i < bucket_size_; ++i) { - size += sizeof(ObHistBucket) + buckets_[i].deep_copy_size(); + for (int64_t i = 0; i < buckets_.count(); ++i) { + size += sizeof(ObHistBucket) + buckets_.at(i).deep_copy_size(); } return size; } @@ -86,17 +76,17 @@ int ObHistogram::deep_copy(const ObHistogram &src, char *buf, const int64_t buf_ type_ = src.type_; sample_size_ = src.sample_size_; density_ = src.density_; + bucket_cnt_ = src.bucket_cnt_; int64_t copy_size = src.deep_copy_size(); if (OB_UNLIKELY(copy_size + pos > buf_len)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("buffer size is not enough", K(ret), K(copy_size), K(pos), K(buf_len)); - } else if (src.bucket_size_ > 0 && src.buckets_ != NULL) { - buckets_ = new (buf + pos) ObHistBucket[src.bucket_size_]; - bucket_size_ = src.bucket_size_; - max_bucket_size_ = src.bucket_size_; - pos += sizeof(ObHistBucket) * bucket_size_; - for (int64_t i = 0; OB_SUCC(ret) && i < bucket_size_; ++i) { - if (OB_FAIL(buckets_[i].deep_copy(src.buckets_[i], buf, buf_len, pos))) { + } else if (!src.buckets_.empty()) { + ObHistBucket *new_buckets = new (buf + pos) ObHistBucket[src.buckets_.count()]; + buckets_ = ObArrayWrap(new_buckets, src.buckets_.count()); + pos += sizeof(ObHistBucket) * src.buckets_.count(); + for (int64_t i = 0; OB_SUCC(ret) && i < buckets_.count(); ++i) { + if (OB_FAIL(buckets_.at(i).deep_copy(src.buckets_.at(i), buf, buf_len, pos))) { LOG_WARN("deep copy bucket failed", K(ret), K(buf_len), K(pos)); } } @@ -104,128 +94,51 @@ int ObHistogram::deep_copy(const ObHistogram &src, char *buf, const int64_t buf_ return ret; } -int ObHistogram::assign_buckets(const ObHistBucket *buckets, const int64_t bucket_size) +int ObHistogram::prepare_allocate_buckets(ObIAllocator &allocator, const int64_t bucket_size) { int ret = OB_SUCCESS; - void *buf = NULL; - if (buckets == NULL || bucket_size == 0) { - //do nothing - } else if (buckets_ != NULL || OB_UNLIKELY(bucket_size_ > 0)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(buckets_), K(bucket_size_)); - } else if (OB_ISNULL(buf = get_allocator().alloc(sizeof(ObHistBucket) * bucket_size))) { - COMMON_LOG_RET(WARN, OB_ALLOCATE_MEMORY_FAILED, "allocate memory for buckets failed."); - } else { - buckets_ = new (buf) ObHistBucket[bucket_size]; - bucket_size_ = bucket_size; - max_bucket_size_ = bucket_size; - for (int64_t i = 0; i < bucket_size_; ++i) { - buckets_[i].endpoint_repeat_count_ = buckets[i].endpoint_repeat_count_; - buckets_[i].endpoint_num_ = buckets[i].endpoint_num_; - buckets_[i].endpoint_value_ = buckets[i].endpoint_value_; - } + if (OB_FAIL(buckets_.allocate_array(allocator, bucket_size))) { + LOG_WARN("failed to prepare allocate buckets", K(ret)); } return ret; } -//the endpoint value is shallow copy!!!!!!!!! -int ObHistogram::add_buckets(const ObIArray &buckets) -{ - int ret = OB_SUCCESS; - void *buf = NULL; - if (buckets.empty()) { - //do nothing - } else if (buckets_ != NULL || OB_UNLIKELY(bucket_size_ > 0)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(buckets_), K(bucket_size_)); - } else if (OB_ISNULL(buf = get_allocator().alloc(sizeof(ObHistBucket) * buckets.count()))) { - COMMON_LOG_RET(WARN, OB_ALLOCATE_MEMORY_FAILED, "allocate memory for buckets failed."); - } else { - buckets_ = new (buf) ObHistBucket[buckets.count()]; - bucket_size_ = buckets.count(); - max_bucket_size_ = buckets.count(); - for (int64_t i = 0; i < bucket_size_; ++i) { - buckets_[i].endpoint_repeat_count_ = buckets.at(i).endpoint_repeat_count_; - buckets_[i].endpoint_num_ = buckets.at(i).endpoint_num_; - buckets_[i].endpoint_value_ = buckets.at(i).endpoint_value_; - } - } - return ret; -} - -//the endpoint value is shallow copy!!!!!!!!! int ObHistogram::add_bucket(const ObHistBucket &bucket) { int ret = OB_SUCCESS; - if (OB_ISNULL(buckets_) && bucket_size_ != 0) { + if (OB_UNLIKELY(bucket_cnt_ >= buckets_.count())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(buckets_), K(bucket_size_), K(max_bucket_size_)); - } else if (bucket_size_ < max_bucket_size_) { - if (OB_ISNULL(buckets_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(buckets_), K(bucket_size_), K(max_bucket_size_)); - } else { - buckets_[bucket_size_].endpoint_repeat_count_ = bucket.endpoint_repeat_count_; - buckets_[bucket_size_].endpoint_num_ = bucket.endpoint_num_; - buckets_[bucket_size_].endpoint_value_ = bucket.endpoint_value_; - ++ bucket_size_; - } + LOG_WARN("get unexpected null", K(ret), K(bucket_cnt_), K(buckets_)); } else { - void *buf = NULL; - max_bucket_size_ = bucket_size_ == 0 ? 1 : 2 * bucket_size_; - if (OB_ISNULL(buf = get_allocator().alloc(sizeof(ObHistBucket) * max_bucket_size_))) { - COMMON_LOG_RET(WARN, OB_ALLOCATE_MEMORY_FAILED, "allocate memory for buckets failed."); - } else { - ObHistBucket *new_buckets = new (buf) ObHistBucket[max_bucket_size_]; - for (int64_t i = 0; i < bucket_size_; ++i) { - new_buckets[i].endpoint_repeat_count_ = buckets_[i].endpoint_repeat_count_; - new_buckets[i].endpoint_num_ = buckets_[i].endpoint_num_; - new_buckets[i].endpoint_value_ = buckets_[i].endpoint_value_; - } - new_buckets[bucket_size_].endpoint_repeat_count_ = bucket.endpoint_repeat_count_; - new_buckets[bucket_size_].endpoint_num_ = bucket.endpoint_num_; - new_buckets[bucket_size_].endpoint_value_ = bucket.endpoint_value_; - ++ bucket_size_; - get_allocator().free(buckets_); - buckets_ = new_buckets; - } + buckets_.at(bucket_cnt_++) = bucket; } return ret; } -int ObHistogram::prepare_allocate_buckets(const int64_t buckets_num) +int ObHistogram::assign_buckets(const ObIArray &buckets) { int ret = OB_SUCCESS; - void *buf = NULL; - if (buckets_ != NULL || OB_UNLIKELY(bucket_size_ > 0 || buckets_num <= 0)) { + if (OB_UNLIKELY(buckets_.count() != buckets.count() || bucket_cnt_ != buckets.count())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(buckets_), K(bucket_size_), K(buckets_num)); - } else if (OB_ISNULL(buf = get_allocator().alloc(sizeof(ObHistBucket) * buckets_num))) { - COMMON_LOG_RET(WARN, OB_ALLOCATE_MEMORY_FAILED, "allocate memory for buckets failed."); + LOG_WARN("get unexpected null", K(ret), K(buckets_), K(buckets), K(bucket_cnt_)); } else { - buckets_ = new (buf) ObHistBucket[buckets_num]; - max_bucket_size_ = buckets_num; + for (int64_t i = 0; i < buckets.count(); ++i) { + buckets_.at(i) = buckets.at(i); + } } return ret; } -int64_t ObHistogram::to_string(char *buf, const int64_t buf_len) const +int ObHistogram::assign(const ObHistogram &other) { - int64_t pos = 0; - J_OBJ_START(); - J_KV("Type", get_type_name(), - K_(sample_size), - K_(density), - K_(bucket_size), - K_(max_bucket_size), - K_(buckets)); - if (buckets_ != NULL && bucket_size_ > 0 && max_bucket_size_ > 0) { - for (int64_t i = 0; i < bucket_size_; ++i) { - J_KV(K(buckets_[i])); - } - } - J_OBJ_END(); - return pos; + int ret = OB_SUCCESS; + type_ = other.type_; + sample_size_ = other.sample_size_; + density_ = other.density_; + bucket_cnt_ = other.bucket_cnt_; + pop_freq_ = other.pop_freq_; + pop_count_ = other.pop_count_; + return buckets_.assign(other.buckets_); } ObOptColumnStat::ObOptColumnStat() @@ -266,7 +179,7 @@ ObOptColumnStat::ObOptColumnStat(ObIAllocator &allocator) max_value_(), llc_bitmap_size_(0), llc_bitmap_(NULL), - histogram_(allocator), + histogram_(), last_analyzed_(0), cs_type_(CS_TYPE_INVALID), inner_max_allocator_("OptColStatMax"), diff --git a/src/share/stat/ob_opt_column_stat.h b/src/share/stat/ob_opt_column_stat.h index c59609f19..176eec103 100644 --- a/src/share/stat/ob_opt_column_stat.h +++ b/src/share/stat/ob_opt_column_stat.h @@ -72,6 +72,7 @@ class ObHistogram public: friend class ObOptColumnStat; + typedef ObArrayWrap Buckets; enum class BoundType { LOWER, UPPER, @@ -82,35 +83,18 @@ public: type_(ObHistType::INVALID_TYPE), sample_size_(-1), density_(0), - bucket_size_(0), - max_bucket_size_(0), - buckets_(NULL), + bucket_cnt_(0), + buckets_(), pop_freq_(0), - pop_count_(0), - allocator_(NULL), - inner_allocator_("ObHistogram") - {} - - ObHistogram(ObIAllocator &allocator) : - type_(ObHistType::INVALID_TYPE), - sample_size_(-1), - density_(0), - bucket_size_(0), - max_bucket_size_(0), - buckets_(NULL), - pop_freq_(0), - pop_count_(0), - allocator_(&allocator), - inner_allocator_("ObHistogram") - {} + pop_count_(0) + {} ~ObHistogram() { reset(); } void reset(); int deep_copy(const ObHistogram &src, char *buf, const int64_t buf_len, int64_t &pos); - - int assign_buckets(const ObHistBucket *buckets, const int64_t bucket_size); + int assign(const ObHistogram &other); int64_t deep_copy_size() const; bool is_valid() const @@ -128,43 +112,44 @@ public: void set_sample_size(int64_t sample_size) { sample_size_ = sample_size; } double get_density() const { return density_; } - void set_density(double density) { density_ = density; } - int64_t get_bucket_size() const { return bucket_size_; } + int64_t get_bucket_cnt() const { return bucket_cnt_; } + void set_bucket_cnt(int64_t bucket_cnt) { bucket_cnt_ = bucket_cnt; } - int add_buckets(const ObIArray &buckets); + int64_t get_bucket_size() const { return buckets_.count(); } - int add_bucket(const ObHistBucket &bucket); - - int prepare_allocate_buckets(const int64_t buckets_num); - - ObHistBucket *get_buckets() { return buckets_; } - const ObHistBucket *get_buckets() const { return buckets_; } + ObHistBucket &get(int64_t i) { return buckets_.at(i); } + const ObHistBucket &get(int64_t i) const { return buckets_.at(i); } + Buckets &get_buckets() { return buckets_; } + const Buckets &get_buckets() const { return buckets_; } int64_t get_pop_frequency() const { return pop_freq_; } void set_pop_frequency(int64_t pop_freq) { pop_freq_ = pop_freq; } int64_t get_pop_count() const { return pop_count_; } void set_pop_count(int64_t pop_count) { pop_count_ = pop_count; } + int prepare_allocate_buckets(ObIAllocator &allocator, const int64_t bucket_size); + int add_bucket(const ObHistBucket &bucket); + int assign_buckets(const ObIArray &buckets); + void calc_density(ObHistType hist_type, const int64_t row_count, const int64_t pop_row_count, const int64_t ndv, const int64_t pop_ndv); - ObIAllocator &get_allocator() { return allocator_ != NULL ? *allocator_ : inner_allocator_; } - int64_t to_string(char* buf, const int64_t buf_len) const; - + TO_STRING_KV("Type", get_type_name(), + K_(sample_size), + K_(density), + K_(bucket_cnt), + K_(buckets)); protected: ObHistType type_; int64_t sample_size_; double density_; - int64_t bucket_size_; - int64_t max_bucket_size_; - ObHistBucket *buckets_; + int64_t bucket_cnt_; + Buckets buckets_; int64_t pop_freq_; // only used during gather table stats int64_t pop_count_; // only used during gather table stats - ObIAllocator *allocator_; - ObArenaAllocator inner_allocator_; }; class ObOptColumnStat : public common::ObIKVCacheValue @@ -279,6 +264,7 @@ public: const ObHistogram &get_histogram() const { return histogram_; } ObHistogram &get_histogram() { return histogram_; } + int64_t get_bucket_num() const { return histogram_.get_bucket_cnt(); } virtual int64_t size() const override; virtual int deep_copy(char *buf, const int64_t buf_len, ObIKVCacheValue *&value) const override; diff --git a/src/share/stat/ob_opt_stat_sql_service.cpp b/src/share/stat/ob_opt_stat_sql_service.cpp index c54aec8c9..6babce00c 100644 --- a/src/share/stat/ob_opt_stat_sql_service.cpp +++ b/src/share/stat/ob_opt_stat_sql_service.cpp @@ -540,42 +540,27 @@ int ObOptStatSqlService::construct_delete_column_histogram_sql(const uint64_t te ObSqlString &delete_histogram_sql) { int ret = OB_SUCCESS; - ObSEArray key_column_stats; - ObArenaAllocator allocator(ObModIds::OB_BUFFER); + ObSqlString where_str; + const uint64_t exec_tenant_id = ObSchemaUtils::get_exec_tenant_id(tenant_id); for (int64_t i = 0; OB_SUCC(ret) && i < column_stats.count(); ++i) { if (OB_ISNULL(column_stats.at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(column_stats.at(i))); - } else { - ObOptColumnStat::Key check_key(tenant_id, - column_stats.at(i)->get_table_id(), + } else if (where_str.append_fmt(" %s (%lu, %ld, %ld, %lu) %s", + i != 0 ? "," : "(TENANT_ID, TABLE_ID, PARTITION_ID, COLUMN_ID) IN (", + ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id), + ObSchemaUtils::get_extract_schema_id(exec_tenant_id, column_stats.at(i)->get_table_id()), column_stats.at(i)->get_partition_id(), - column_stats.at(i)->get_column_id()); - void *ptr = NULL; - if (OB_ISNULL(ptr = allocator.alloc(sizeof(ObOptColumnStat::Key)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("memory is not enough", K(ret), K(ptr)); - } else { - ObOptKeyColumnStat tmp_key_col_stat; - tmp_key_col_stat.key_ = new (ptr) ObOptColumnStat::Key(tenant_id, - column_stats.at(i)->get_table_id(), - column_stats.at(i)->get_partition_id(), - column_stats.at(i)->get_column_id()); - tmp_key_col_stat.stat_ = const_cast(column_stats.at(i)); - if (OB_FAIL(key_column_stats.push_back(tmp_key_col_stat))) { - LOG_WARN("failed to push back", K(ret)); - } else {/*do nothing*/} - } + column_stats.at(i)->get_column_id(), + i == column_stats.count() - 1 ? ")" : "")) { + LOG_WARN("failed to append fmt", K(ret)); } } - if (OB_SUCC(ret) && !key_column_stats.empty()) { - ObSqlString keys_list_str; - if (OB_FAIL(generate_specified_keys_list_str(tenant_id, key_column_stats, keys_list_str))) { - LOG_WARN("failed to generate specified keys list str", K(ret), K(key_column_stats)); - } else if (OB_FAIL(delete_histogram_sql.append_fmt(" %s %.*s;", DELETE_HISTOGRAM_STAT_SQL, - keys_list_str.string().length(), - keys_list_str.string().ptr()))) { - LOG_WARN("fail to append SQL where string.", K(ret)); + if (OB_SUCC(ret) && !where_str.empty()) { + if (OB_FAIL(delete_histogram_sql.append_fmt(" %s %.*s;", DELETE_HISTOGRAM_STAT_SQL, + where_str.string().length(), + where_str.string().ptr()))) { + LOG_WARN("fail to append SQL where string.", K(ret)); } else { LOG_TRACE("Succeed to construct delete column histogram sql", K(delete_histogram_sql)); } @@ -617,7 +602,7 @@ int ObOptStatSqlService::construct_histogram_insert_sql(share::schema::ObSchemaG } else if (OB_FAIL(get_histogram_stat_history_sql(tenant_id, *column_stats.at(i), allocator, - hist.get_buckets()[j], + hist.get(j), current_time, endpoint_meta, tmp))) { @@ -630,7 +615,7 @@ int ObOptStatSqlService::construct_histogram_insert_sql(share::schema::ObSchemaG } else if (!need_histogram && OB_FAIL(insert_histogram_sql.append(INSERT_HISTOGRAM_STAT_SQL))) { LOG_WARN("failed to append sql", K(ret)); } else if (OB_FAIL(get_histogram_stat_sql(tenant_id, *column_stats.at(i), - allocator, hist.get_buckets()[j], endpoint_meta, tmp))) { + allocator, hist.get(j), endpoint_meta, tmp))) { LOG_WARN("failed to get histogram sql", K(ret)); } else if (OB_FAIL(insert_histogram_sql.append_fmt("%s (%s)", (!need_histogram ? "" : ","), tmp.ptr()))) { LOG_WARN("failed to append sql", K(ret)); @@ -935,7 +920,7 @@ int ObOptStatSqlService::get_column_stat_sql(const uint64_t tenant_id, OB_FAIL(dml_splicer.add_column("distinct_cnt_synopsis_size", llc_comp_size * 2)) || OB_FAIL(dml_splicer.add_column("sample_size", stat.get_histogram().get_sample_size())) || OB_FAIL(dml_splicer.add_column("density", stat.get_histogram().get_density())) || - OB_FAIL(dml_splicer.add_column("bucket_cnt", stat.get_histogram().get_bucket_size())) || + OB_FAIL(dml_splicer.add_column("bucket_cnt", stat.get_histogram().get_bucket_cnt())) || OB_FAIL(dml_splicer.add_column("histogram_type", stat.get_histogram().get_type())) || OB_FAIL(dml_splicer.add_column("global_stats", 0)) || OB_FAIL(dml_splicer.add_column("user_stats", 0))) { @@ -1010,7 +995,7 @@ int ObOptStatSqlService::get_column_stat_history_sql(const uint64_t tenant_id, OB_FAIL(dml_splicer.add_column("distinct_cnt_synopsis_size", llc_comp_size * 2)) || OB_FAIL(dml_splicer.add_column("sample_size", stat.get_histogram().get_sample_size())) || OB_FAIL(dml_splicer.add_column("density", stat.get_histogram().get_density())) || - OB_FAIL(dml_splicer.add_column("bucket_cnt", stat.get_histogram().get_bucket_size())) || + OB_FAIL(dml_splicer.add_column("bucket_cnt", stat.get_histogram().get_bucket_cnt())) || OB_FAIL(dml_splicer.add_column("histogram_type", stat.get_histogram().get_type()))) { LOG_WARN("failed to add dml splicer column", K(ret)); } else if (OB_FAIL(dml_splicer.splice_values(sql_string))) { @@ -1356,7 +1341,7 @@ int ObOptStatSqlService::fill_column_stat(ObIAllocator &allocator, EXTRACT_INT_FIELD_MYSQL(result, "distinct_cnt_synopsis_size", llc_bitmap_size, int64_t); if (OB_SUCC(ret)) { hist.set_type(histogram_type); - if (hist.is_valid() && OB_FAIL(hist.prepare_allocate_buckets(bucket_cnt))) { + if (hist.is_valid() && OB_FAIL(hist.prepare_allocate_buckets(allocator, bucket_cnt))) { LOG_WARN("failed to prepare allocate buckets", K(ret)); } } @@ -1509,7 +1494,7 @@ int ObOptStatSqlService::fill_bucket_stat(ObIAllocator &allocator, if (OB_FAIL(hex_str_to_obj(str.ptr(), str.length(), allocator, bkt.endpoint_value_))) { LOG_WARN("deserialize object value failed.", K(stat), K(ret)); } else if (OB_FAIL(dst_key_col_stat.stat_->get_histogram().add_bucket(bkt))) { - LOG_WARN("failed to add backet", K(ret)); + LOG_WARN("failed to push back buckets", K(ret)); } else {/*do nothing*/} } } diff --git a/src/share/stat/ob_stat_item.cpp b/src/share/stat/ob_stat_item.cpp index 01fe4cdfe..65823f182 100644 --- a/src/share/stat/ob_stat_item.cpp +++ b/src/share/stat/ob_stat_item.cpp @@ -323,7 +323,7 @@ public: ObBucketCompare &compare_; }; -int ObStatTopKHist::decode(ObObj &obj) +int ObStatTopKHist::decode(ObObj &obj, ObIAllocator &allocator) { int ret = OB_SUCCESS; ObTopKFrequencyHistograms topk_hist; @@ -334,7 +334,8 @@ int ObStatTopKHist::decode(ObObj &obj) LOG_WARN("param is null", K(ret), K(bucket_num), K(col_param_)); } else if (OB_FAIL(topk_hist.read_result(obj))) { LOG_WARN("failed to read result from obj", K(ret)); - } else if (OB_FAIL(build_histogram_from_topk_items(topk_hist.get_buckets(), + } else if (OB_FAIL(build_histogram_from_topk_items(allocator, + topk_hist.get_buckets(), col_param_->bucket_num_, tab_stat_->get_row_count(), col_stat_->get_num_not_null(), @@ -345,7 +346,8 @@ int ObStatTopKHist::decode(ObObj &obj) return ret; } -int ObStatTopKHist::build_histogram_from_topk_items(const ObIArray &buckets, +int ObStatTopKHist::build_histogram_from_topk_items(ObIAllocator &allocator, + const ObIArray &buckets, int64_t max_bucket_num, int64_t total_row_count, int64_t not_null_count, @@ -377,7 +379,8 @@ int ObStatTopKHist::build_histogram_from_topk_items(const ObIArray & tmp.at(i).endpoint_num_ += tmp.at(i - 1).endpoint_num_; } if (OB_SUCC(ret)) { - if (OB_FAIL(try_build_topk_histogram(tmp, + if (OB_FAIL(try_build_topk_histogram(allocator, + tmp, max_bucket_num, total_row_count, not_null_count, @@ -400,7 +403,8 @@ int ObStatTopKHist::build_histogram_from_topk_items(const ObIArray & * @param histogram, the result histogram built from bkts * @return */ -int ObStatTopKHist::try_build_topk_histogram(const ObIArray &bkts, +int ObStatTopKHist::try_build_topk_histogram(ObIAllocator &allocator, + const ObIArray &bkts, const int64_t max_bucket_num, const int64_t total_row_count, const int64_t not_null_count, @@ -417,16 +421,22 @@ int ObStatTopKHist::try_build_topk_histogram(const ObIArray &bkts, // all vals are null, there is no need to build a histogram histogram.set_type(ObHistType::INVALID_TYPE); histogram.set_sample_size(0); + histogram.set_bucket_cnt(0); histogram.set_density(0); } else if (num > 0 && bkts.at(num - 1).endpoint_num_ == not_null_count) { histogram.set_type(ObHistType::FREQUENCY); histogram.set_sample_size(not_null_count); + histogram.set_bucket_cnt(bkts.count()); histogram.calc_density(ObHistType::FREQUENCY, not_null_count, not_null_count, num_distinct, bkts.count()); - ret = histogram.add_buckets(bkts); + if (OB_FAIL(histogram.prepare_allocate_buckets(allocator, bkts.count()))) { + LOG_WARN("failed to prepare allocate buckets", K(ret)); + } else if (OB_FAIL(histogram.assign_buckets(bkts))) { + LOG_WARN("failed to assign buckets", K(ret)); + } else {/*do nothing*/} } else if (num > 0 && bkts.at(num - 1).endpoint_num_ >= (not_null_count * (1 - 1.0 / max_bucket_num))) { histogram.set_type(ObHistType::TOP_FREQUENCY); @@ -436,7 +446,7 @@ int ObStatTopKHist::try_build_topk_histogram(const ObIArray &bkts, bkts.at(num - 1).endpoint_num_, num_distinct, num); - if (OB_FAIL(histogram.prepare_allocate_buckets(num))) { + if (OB_FAIL(histogram.prepare_allocate_buckets(allocator, num))) { LOG_WARN("failed to prepare allocate buckets", K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < num; ++i) { @@ -450,7 +460,12 @@ int ObStatTopKHist::try_build_topk_histogram(const ObIArray &bkts, // if the topk histogram contains all records of the table // then we can build hybrid histogram directly from the topk result. histogram.set_sample_size(not_null_count); - ret = histogram.add_buckets(bkts); + histogram.set_bucket_cnt(bkts.count()); + if (OB_FAIL(histogram.prepare_allocate_buckets(allocator, bkts.count()))) { + LOG_WARN("failed to prepare allocate buckets", K(ret)); + } else if (OB_FAIL(histogram.assign_buckets(bkts))) { + LOG_WARN("failed to assign buckets", K(ret)); + } else {/*do nothing*/} } } return ret; @@ -688,7 +703,7 @@ int ObStatHybridHist::gen_expr(char *buf, const int64_t buf_len, int64_t &pos) return ret; } -int ObStatHybridHist::decode(ObObj &obj) +int ObStatHybridHist::decode(ObObj &obj, ObIAllocator &allocator) { int ret = OB_SUCCESS; ObHybridHistograms hybrid_hist; @@ -703,9 +718,12 @@ int ObStatHybridHist::decode(ObObj &obj) } else if (OB_FAIL(hybrid_hist.read_result(obj))) { LOG_WARN("failed to read result from obj", K(ret)); } else { - col_stat_->get_histogram().reset(); - if (OB_FAIL(col_stat_->get_histogram().add_buckets(hybrid_hist.get_buckets()))) { - LOG_WARN("failed to append hist bucket", K(ret)); + col_stat_->get_histogram().get_buckets().reset(); + col_stat_->get_histogram().set_bucket_cnt(hybrid_hist.get_buckets().count()); + if (OB_FAIL(col_stat_->get_histogram().prepare_allocate_buckets(allocator, hybrid_hist.get_buckets().count()))) { + LOG_WARN("failed to prepare allocate buckets", K(ret)); + } else if (OB_FAIL(col_stat_->get_histogram().assign_buckets(hybrid_hist.get_buckets()))) { + LOG_WARN("failed to assign buckets", K(ret)); } else { col_stat_->get_histogram().set_type(ObHistType::HYBIRD); col_stat_->get_histogram().set_sample_size(hybrid_hist.get_total_count()); diff --git a/src/share/stat/ob_stat_item.h b/src/share/stat/ob_stat_item.h index fcca2e275..7c108f3c9 100644 --- a/src/share/stat/ob_stat_item.h +++ b/src/share/stat/ob_stat_item.h @@ -45,6 +45,11 @@ public: UNUSED(obj); return OB_NOT_IMPLEMENT; } + virtual int decode(ObObj &obj, ObIAllocator &allocator) + { + UNUSED(allocator); + return decode(obj); + } TO_STRING_KV(K(is_needed())); @@ -235,14 +240,16 @@ public: tab_stat_(tab_stat) {} - static int build_histogram_from_topk_items(const ObIArray &buckets, + static int build_histogram_from_topk_items(ObIAllocator &allocator, + const ObIArray &buckets, int64_t max_bucket_num, int64_t total_row_count, int64_t not_null_count, int64_t num_distinct, ObHistogram &histogram); - static int try_build_topk_histogram(const ObIArray &bkts, + static int try_build_topk_histogram(ObIAllocator &allocator, + const ObIArray &bkts, const int64_t max_bucket_num, const int64_t total_row_count, const int64_t not_null_count, @@ -257,7 +264,7 @@ public: // const bucket_size = 256; virtual bool is_needed() const override; virtual int gen_expr(char *buf, const int64_t buf_len, int64_t &pos) override; - virtual int decode(ObObj &obj) override; + virtual int decode(ObObj &obj, ObIAllocator &allocator) override; protected: ObOptTableStat *tab_stat_; }; @@ -292,7 +299,7 @@ public: {} virtual int gen_expr(char *buf, const int64_t buf_len, int64_t &pos) override; - virtual int decode(ObObj &obj) override; + virtual int decode(ObObj &obj, ObIAllocator &allocator) override; private: bool is_null_item_; }; diff --git a/src/share/stat/ob_stats_estimator.cpp b/src/share/stat/ob_stats_estimator.cpp index 398452e89..6881eb373 100644 --- a/src/share/stat/ob_stats_estimator.cpp +++ b/src/share/stat/ob_stats_estimator.cpp @@ -375,7 +375,7 @@ int ObStatsEstimator::do_estimate(uint64_t tenant_id, } } if (OB_SUCC(ret)) { - if (OB_FAIL(decode())) { + if (OB_FAIL(decode(ctx_.get_allocator()))) { LOG_WARN("failed to decode results", K(ret)); } else if (copy_type == COPY_ALL_STAT && OB_FAIL(copy_opt_stat(src_opt_stat, dst_opt_stats))) { @@ -411,7 +411,7 @@ int ObStatsEstimator::do_estimate(uint64_t tenant_id, return ret; } -int ObStatsEstimator::decode() +int ObStatsEstimator::decode(ObIAllocator &allocator) { int ret = OB_SUCCESS; if (OB_UNLIKELY(stat_items_.count() != results_.count())) { @@ -419,7 +419,7 @@ int ObStatsEstimator::decode() LOG_WARN("size does not match", K(ret), K(stat_items_.count()), K(results_.count())); } for (int64_t i = 0; OB_SUCC(ret) && i < stat_items_.count(); ++i) { - if (OB_FAIL(stat_items_.at(i)->decode(results_.at(i)))) { + if (OB_FAIL(stat_items_.at(i)->decode(results_.at(i), allocator))) { LOG_WARN("failed to decode statistic result", K(ret)); } } @@ -511,8 +511,8 @@ int ObStatsEstimator::copy_col_stats(const int64_t cur_row_cnt, dst_col_stats.at(i)->get_histogram().set_type(src_hist.get_type()); dst_col_stats.at(i)->get_histogram().set_sample_size(src_col_stats.at(i)->get_num_not_null()); dst_col_stats.at(i)->get_histogram().set_density(src_hist.get_density()); - if (OB_FAIL(dst_col_stats.at(i)->get_histogram().assign_buckets(src_hist.get_buckets(), - src_hist.get_bucket_size()))) { + dst_col_stats.at(i)->get_histogram().set_bucket_cnt(src_hist.get_bucket_cnt()); + if (OB_FAIL(dst_col_stats.at(i)->get_histogram().get_buckets().assign(src_hist.get_buckets()))) { LOG_WARN("failed to assign buckets", K(ret)); } else { LOG_TRACE("Succeed to copy col stat", K(*dst_col_stats.at(i)), K(*src_col_stats.at(i))); @@ -563,13 +563,13 @@ int ObStatsEstimator::copy_hybrid_hist_stat(ObOptStat &src_opt_stat, ObHistogram &src_hist = src_col_stat->get_histogram(); dst_col_stat->get_histogram().set_type(src_hist.get_type()); dst_col_stat->get_histogram().set_sample_size(src_hist.get_sample_size()); + dst_col_stat->get_histogram().set_bucket_cnt(src_hist.get_bucket_cnt()); dst_col_stat->get_histogram().calc_density(ObHistType::HYBIRD, src_hist.get_sample_size(), src_hist.get_pop_frequency(), dst_col_stat->get_num_distinct(), src_hist.get_pop_count()); - if (OB_FAIL(dst_col_stat->get_histogram().assign_buckets(src_hist.get_buckets(), - src_hist.get_bucket_size()))) { + if (OB_FAIL(dst_col_stat->get_histogram().get_buckets().assign(src_hist.get_buckets()))) { LOG_WARN("failed to assign buckets", K(ret)); } else { LOG_TRACE("Succeed to copy histogram", K(*dst_col_stat), K(i), K(j)); diff --git a/src/share/stat/ob_stats_estimator.h b/src/share/stat/ob_stats_estimator.h index b4ea407a7..5edade645 100644 --- a/src/share/stat/ob_stats_estimator.h +++ b/src/share/stat/ob_stats_estimator.h @@ -43,7 +43,7 @@ protected: int64_t get_item_size() const { return stat_items_.count(); } - int decode(); + int decode(ObIAllocator &allocator); int add_result(ObObj &obj) { return results_.push_back(obj); } diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index b6c4219d7..f05ab9d1d 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -4894,18 +4894,17 @@ int ObLogPlan::get_popular_values_hash(ObIAllocator &allocator, int ret = OB_SUCCESS; if (OB_ISNULL(handle.stat_) || 0 >= handle.stat_->get_last_analyzed() - || OB_ISNULL(handle.stat_->get_histogram().get_buckets()) || handle.stat_->get_histogram().get_bucket_size() <= 0) { // no histogram info, don't use hybrid hash LOG_DEBUG("table not analyzed. disable hybrid hash DM", K(ret)); } else { const ObHistogram &histogram = handle.stat_->get_histogram(); // get total value count via last bucket by it's cumulative endpoint num - const ObHistBucket &last_bucket = histogram.get_buckets()[histogram.get_bucket_size() - 1]; + const ObHistBucket &last_bucket = histogram.get(histogram.get_bucket_size() - 1); int64_t total_cnt = std::max(1L, last_bucket.endpoint_num_); // avoid zero div int64_t min_freq = optimizer_context_.get_session_info()->get_px_join_skew_minfreq(); for (int64_t i = 0; OB_SUCC(ret) && i < histogram.get_bucket_size(); ++i) { - const ObHistBucket &bucket = histogram.get_buckets()[i]; + const ObHistBucket &bucket = histogram.get(i); int64_t freq = bucket.endpoint_repeat_count_ * 100 / total_cnt; if (freq >= min_freq) { ObObj value; diff --git a/src/sql/optimizer/ob_opt_selectivity.cpp b/src/sql/optimizer/ob_opt_selectivity.cpp index 46f2f4416..432beb4ec 100644 --- a/src/sql/optimizer/ob_opt_selectivity.cpp +++ b/src/sql/optimizer/ob_opt_selectivity.cpp @@ -3276,11 +3276,11 @@ int ObOptSelectivity::get_bucket_bound_idx(const ObHistogram &hist, int64_t right = hist.get_bucket_size() - 1; idx = -1; is_equal = false; - if (OB_LIKELY(hist.get_bucket_size() > 0 && hist.get_buckets() != NULL)) { + if (OB_LIKELY(hist.get_bucket_size() > 0)) { while (OB_SUCC(ret) && left <= right) { int64_t mid = (right + left) / 2; int eq_cmp = 0; - if (OB_FAIL(hist.get_buckets()[mid].endpoint_value_.compare(value, eq_cmp))) { + if (OB_FAIL(hist.get(mid).endpoint_value_.compare(value, eq_cmp))) { LOG_WARN("failed to compare object", K(ret)); } else if (eq_cmp > 0) { // value < bucket[mid].ev @@ -3328,7 +3328,7 @@ int ObOptSelectivity::get_equal_pred_sel(const ObHistogram &histogram, } else if (idx < 0 || idx >= histogram.get_bucket_size() || !is_equal) { density = histogram.get_density(); } else { - density = static_cast(histogram.get_buckets()[idx].endpoint_repeat_count_) + density = static_cast(histogram.get(idx).endpoint_repeat_count_) / histogram.get_sample_size(); } if (OB_SUCC(ret) && sample_size_scale > 0) { @@ -3430,8 +3430,8 @@ int ObOptSelectivity::get_less_pred_sel(const ObHistogram &histogram, } else if (idx >= histogram.get_bucket_size()) { density = 1.0; } else if (is_equal) { - double frequency = histogram.get_buckets()[idx].endpoint_num_ - - (inclusive ? 0 : histogram.get_buckets()[idx].endpoint_repeat_count_); + double frequency = histogram.get(idx).endpoint_num_ - + (inclusive ? 0 : histogram.get(idx).endpoint_repeat_count_); density = frequency / histogram.get_sample_size(); } else { double last_bucket_count = 0; @@ -3439,22 +3439,22 @@ int ObOptSelectivity::get_less_pred_sel(const ObHistogram &histogram, // b[i].ev < maxv < b[i+1].ev // estimate how many elements (smaller than maxv) in bucket[i+1] there are ObObj minscalar, maxscalar, startscalar, endscalar; - ObObj minobj(histogram.get_buckets()[idx].endpoint_value_); - ObObj maxobj(histogram.get_buckets()[idx+1].endpoint_value_); + ObObj minobj(histogram.get(idx).endpoint_value_); + ObObj maxobj(histogram.get(idx+1).endpoint_value_); ObObj startobj(minobj), endobj(maxv); if (OB_FAIL(ObOptEstObjToScalar::convert_objs_to_scalars( &minobj, &maxobj, &startobj, &endobj, &minscalar, &maxscalar, &startscalar, &endscalar))) { LOG_WARN("failed to convert objs to scalars", K(ret)); } else if (maxscalar.get_double() - minscalar.get_double() > OB_DOUBLE_EPSINON) { - last_bucket_count = histogram.get_buckets()[idx+1].endpoint_num_ - - histogram.get_buckets()[idx+1].endpoint_repeat_count_ - - histogram.get_buckets()[idx].endpoint_num_; + last_bucket_count = histogram.get(idx+1).endpoint_num_ - + histogram.get(idx+1).endpoint_repeat_count_ - + histogram.get(idx).endpoint_num_; last_bucket_count *= (endscalar.get_double() - startscalar.get_double()) / (maxscalar.get_double() - minscalar.get_double()); } } - density = static_cast(histogram.get_buckets()[idx].endpoint_num_ + last_bucket_count) + density = static_cast(histogram.get(idx).endpoint_num_ + last_bucket_count) / histogram.get_sample_size(); } LOG_TRACE("link bug", K(density), K(maxv), K(inclusive), K(idx), K(is_equal)); @@ -4076,15 +4076,15 @@ int ObOptSelectivity::get_join_pred_rows(const ObHistogram &left_hist, int64_t ridx = 0; while (OB_SUCC(ret) && lidx < left_hist.get_bucket_size() && ridx < right_hist.get_bucket_size()) { int eq_cmp = 0; - if (OB_FAIL(left_hist.get_buckets()[lidx].endpoint_value_.compare(right_hist.get_buckets()[ridx].endpoint_value_, + if (OB_FAIL(left_hist.get(lidx).endpoint_value_.compare(right_hist.get(ridx).endpoint_value_, eq_cmp))) { LOG_WARN("failed to compare histogram endpoint value", K(ret), - K(left_hist.get_buckets()[lidx].endpoint_value_), K(right_hist.get_buckets()[ridx].endpoint_value_)); + K(left_hist.get(lidx).endpoint_value_), K(right_hist.get(ridx).endpoint_value_)); } else if (0 == eq_cmp) { if (is_semi) { - rows += left_hist.get_buckets()[lidx].endpoint_repeat_count_; + rows += left_hist.get(lidx).endpoint_repeat_count_; } else { - rows += left_hist.get_buckets()[lidx].endpoint_repeat_count_ * right_hist.get_buckets()[ridx].endpoint_repeat_count_; + rows += left_hist.get(lidx).endpoint_repeat_count_ * right_hist.get(ridx).endpoint_repeat_count_; } ++lidx; ++ridx; diff --git a/src/sql/resolver/ddl/ob_analyze_stmt_resolver.cpp b/src/sql/resolver/ddl/ob_analyze_stmt_resolver.cpp index 425e2da8d..e5387e07e 100644 --- a/src/sql/resolver/ddl/ob_analyze_stmt_resolver.cpp +++ b/src/sql/resolver/ddl/ob_analyze_stmt_resolver.cpp @@ -431,5 +431,20 @@ int ObAnalyzeStmtResolver::resolve_sample_clause_info(const ParseNode *sample_cl return ret; } +int ObAnalyzeStmtResolver::get_bucket_size(const ParseNode *node, + int64_t &bucket_num) +{ + int ret = OB_SUCCESS; + if (NULL == node) { + // do nothing + } else if (T_INT != node->type_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected node type", K(ret)); + } else { + bucket_num = node->value_; + } + return ret; +} + } /* namespace sql */ } /* namespace oceanbase */ diff --git a/src/sql/resolver/ddl/ob_analyze_stmt_resolver.h b/src/sql/resolver/ddl/ob_analyze_stmt_resolver.h index 70aeb0162..1ef273018 100644 --- a/src/sql/resolver/ddl/ob_analyze_stmt_resolver.h +++ b/src/sql/resolver/ddl/ob_analyze_stmt_resolver.h @@ -50,6 +50,9 @@ private: ObAnalyzeStmt &analyze_stmt); int resolve_sample_clause_info(const ParseNode *sample_clause_node, ObAnalyzeStmt &analyze_stmt); + + int get_bucket_size(const ParseNode *node, int64_t &bucket_num); + // disallow copy DISALLOW_COPY_AND_ASSIGN(ObAnalyzeStmtResolver); }; diff --git a/unittest/sql/optimizer/test_opt_est_sel.cpp b/unittest/sql/optimizer/test_opt_est_sel.cpp index 43d26f469..3bfe3d3fc 100644 --- a/unittest/sql/optimizer/test_opt_est_sel.cpp +++ b/unittest/sql/optimizer/test_opt_est_sel.cpp @@ -680,7 +680,7 @@ TEST_F(TestOptEstSel, histogram) num_elements.push_back(infos[i][2]); } - init_histogram(ObHistType::FREQUENCY, 100, 0.0025, + init_histogram(allocator_, ObHistType::FREQUENCY, 100, 0.0025, repeat_count, value, num_elements, opt_stat_.get_histogram()); run_test(test_file, result_file, tmp_file, 10); diff --git a/unittest/sql/optimizer/test_optimizer_utils.cpp b/unittest/sql/optimizer/test_optimizer_utils.cpp index 87946c34c..6b1f49057 100644 --- a/unittest/sql/optimizer/test_optimizer_utils.cpp +++ b/unittest/sql/optimizer/test_optimizer_utils.cpp @@ -403,6 +403,7 @@ void TestOptimizerUtils::run_test(const char* test_file, } void TestOptimizerUtils::init_histogram( + common::ObIAllocator &allocator, const ObHistType type, const double sample_size, const double density, @@ -418,14 +419,13 @@ void TestOptimizerUtils::init_histogram( hist.set_sample_size(sample_size); hist.set_density(density); int64_t bucket_cnt = 0; - ObSEArray tmp_buckets; + hist.prepare_allocate_buckets(allocator, repeat_count.count()); for (int64_t i = 0; i < repeat_count.count(); i++) { - ObHistBucket bucket(repeat_count.at(i), num_elements.at(i)); - tmp_buckets.push_back(bucket); - tmp_buckets.at(tmp_buckets.count() - 1).endpoint_value_.set_int(value.at(i)); - bucket_cnt += num_elements.at(i); + hist.get(i).endpoint_num_ = repeat_count.at(i); + hist.get(i).endpoint_repeat_count_ = num_elements.at(i); + hist.get(i).endpoint_value_.set_int(value.at(i)); } - hist.add_buckets(tmp_buckets); + hist.set_bucket_cnt(repeat_count.count()); } void TestOptimizerUtils::run_fail_test(const char *test_file) diff --git a/unittest/sql/optimizer/test_optimizer_utils.h b/unittest/sql/optimizer/test_optimizer_utils.h index 676b03928..1abdf6960 100644 --- a/unittest/sql/optimizer/test_optimizer_utils.h +++ b/unittest/sql/optimizer/test_optimizer_utils.h @@ -89,6 +89,7 @@ class TestOptimizerUtils : public TestSqlUtils, public ::testing::Test { void formalize_tmp_file(const char *tmp_file); void init_histogram( + common::ObIAllocator &allocator, const ObHistType type, const double sample_size, const double density,