update histogram buckets from pointer to ObArrayWrap

This commit is contained in:
wangt1xiuyi 2023-02-10 11:11:04 +00:00 committed by ob-robot
parent 0a9101d215
commit ca1fcada83
23 changed files with 270 additions and 299 deletions

View File

@ -172,19 +172,17 @@ int ObDbmsStatsExecutor::check_all_cols_range_skew(const ObTableStatParam &param
ObHistogram &hist = col_stats.at(j)->get_histogram();
if ((hist.get_type() == ObHistType::FREQUENCY && col_param.is_size_skewonly()) ||
hist.get_type() == ObHistType::HYBIRD) {
if (OB_ISNULL(hist.get_buckets()) ||
OB_UNLIKELY(hist.get_bucket_size() < 1 || col_param.bucket_num_ < 1)) {
if (OB_UNLIKELY(hist.get_bucket_size() < 1 || col_param.bucket_num_ < 1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", K(ret), K(hist.get_buckets()), K(hist.get_bucket_size()),
LOG_WARN("get unexpected error", K(ret), K(hist.get_bucket_size()),
K(col_param.bucket_num_), K(*col_stats.at(j)));
} else {
bool is_even_dist = false;
int64_t standard_cnt = hist.get_type() == ObHistType::FREQUENCY ?
hist.get_buckets()[0].endpoint_num_ :
hist.get_buckets().at(0).endpoint_num_ :
hist.get_sample_size() / col_param.bucket_num_;
if (OB_FAIL(ObDbmsStatsUtils::check_range_skew(hist.get_type(),
hist.get_buckets(),
hist.get_bucket_size(),
standard_cnt,
is_even_dist))) {
LOG_WARN("failed to check range skew", K(ret));

View File

@ -40,7 +40,7 @@ namespace common {
#define CREATE_MYSQL_STAT_TABLE "(STATID VARCHAR(128), TYPE CHAR(1), VERSION DECIMAL,FLAGS DECIMAL,\
C1 VARCHAR(128),C2 VARCHAR(128), C3 VARCHAR(128),C4 VARCHAR(128),\
C5 VARCHAR(128), C6 VARCHAR(128), N1 DECIMAL, N2 DECIMAL,\
C5 VARCHAR(128), C6 VARCHAR(128), N1 DECIMAL, N2 DOUBLE,\
N3 DECIMAL, N4 DECIMAL, N5 DECIMAL, N6 DECIMAL, N7 DECIMAL,\
N8 DECIMAL, N9 DECIMAL, N10 DECIMAL, N11 DECIMAL, N12 DECIMAL,\
N13 DECIMAL, D1 TIMESTAMP(6), T1 TIMESTAMP, R1 TEXT(1000), \
@ -73,7 +73,7 @@ namespace common {
stat.distinct_cnt n1, stat.density n2, null n3, stat.sample_size n4, \
stat.null_cnt n5, NULL n6, NULL n7, stat.avg_len n8, 1 n9, \
hist.endpoint_num n10, hist.endpoint_normalized_value n11, \
hist.endpoint_repeat_cnt n12, null n13, last_analyzed d1, null t1, \
hist.endpoint_repeat_cnt n12, stat.bucket_cnt n13, last_analyzed d1, null t1, \
stat.b_min_value r1, stat.b_max_value r2, hist.b_endpoint_value r3, \
null ch1, null cl1, null bl1, stat.distinct_cnt_synopsis_size ob_spec1,\
null ob_spec2, stat.distinct_cnt_synopsis ob_spec3 from\
@ -600,8 +600,9 @@ int ObDbmsStatsExportImport::do_import_stats(ObExecContext &ctx,
}
if (OB_ITER_END != ret) {
LOG_WARN("failed to get result", K(ret));
} else if (OB_FAIL(check_col_stat_validity(all_cstats))) {
LOG_WARN("failed to check col stat validity", K(ret));
} else {
ret = OB_SUCCESS;
ObSEArray<ObOptTableStatHandle, 4> history_tab_handles;
ObSEArray<ObOptColumnStatHandle, 4> history_col_handles;
//before import, we need record history stats.
@ -656,7 +657,7 @@ int ObDbmsStatsExportImport::do_import_stats(ObExecContext &ctx,
* 19.N10 NUMBER <==> Endpoint number('C')
* 20.N11 NUMBER <==> Endpoint value('C')
* 21.N12 NUMBER <==> ENDPOINT_REPEAT_COUNT('C')
* 22.N13 NUMBER <==> NULL
* 22.N13 NUMBER <==> bucket_cnt('C')
* 23.D1 DATE <==> Last analyzed
* 24.T1 TIMESTAMP(6) WITH TIME ZONE <==> NULL
* 25.R1 RAW(32) <==> Lower raw value('C')
@ -830,14 +831,16 @@ int ObDbmsStatsExportImport::get_opt_stat(ObExecContext &ctx,
number::ObNumber num_val;
int64_t int_val = 0;
double double_val = 0.0;
if (!result_objs.at(i).is_null() && OB_FAIL(result_objs.at(i).get_number(num_val))) {
LOG_WARN("failed to get number", K(ret));
if (result_objs.at(i).is_number() &&
(OB_FAIL(result_objs.at(i).get_number(num_val)) ||
OB_FAIL(ObDbmsStatsUtils::cast_number_to_double(num_val, double_val)))) {
LOG_WARN("failed to get double", K(ret));
} else if (result_objs.at(i).is_double() && OB_FAIL(result_objs.at(i).get_double(double_val))) {
LOG_WARN("failed to get double", K(ret));
} else if (stat_type == TABLE_STAT || stat_type == INDEX_STAT) {
/*do nothing*/
} else if (stat_type == COLUMN_STAT) {
if (OB_FAIL(ObDbmsStatsUtils::cast_number_to_double(num_val, double_val))) {
LOG_WARN("failed to cast number to double" , K(ret));
} else if (double_val > 0.0) {
if (double_val > 0.0) {
/*do nothing*/
if (OB_UNLIKELY(hist_type == INVALID_TYPE)) {
ret = OB_ERR_DBMS_STATS_PL;
@ -972,7 +975,31 @@ int ObDbmsStatsExportImport::get_opt_stat(ObExecContext &ctx,
}
break;
}
case StatTableColumnName::N13: {//not used
case StatTableColumnName::N13: {//bucket_cnt('C')
number::ObNumber num_val;
int64_t int_val = 0;
if (stat_type != COLUMN_STAT) {
if (OB_UNLIKELY(!result_objs.at(i).is_null())) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("Invalid or inconsistent input values", K(ret), K(result_objs.at(i)));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Invalid or inconsistent input values");
}
} else if (!result_objs.at(i).is_null() &&
OB_FAIL(result_objs.at(i).get_number(num_val))) {
LOG_WARN("failed to get number", K(ret));
} else if (OB_FAIL(num_val.extract_valid_int64_with_trunc(int_val))) {
LOG_WARN("extract_valid_int64_with_trunc failed", K(ret), K(num_val));
} else if (int_val > 0) {
if (OB_UNLIKELY(col_stat->get_histogram().get_density() <= 0.0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", K(result_objs), K(ret), KPC(col_stat));
} else if (col_stat->get_histogram().get_buckets().empty()) {
if (OB_FAIL(col_stat->get_histogram().prepare_allocate_buckets(ctx.get_allocator(),
int_val))) {
LOG_WARN("failed to prepare allocate buckets", K(ret));
} else {/*do nothing*/}
}
}
break;
}
case StatTableColumnName::D1: {//Last analyzed
@ -1063,8 +1090,13 @@ int ObDbmsStatsExportImport::get_opt_stat(ObExecContext &ctx,
result_objs.at(i),
hist_bucket.endpoint_value_))) {
LOG_WARN("failed to convert bin hex text to obj", K(ret));
} else if (OB_UNLIKELY(col_stat->get_histogram().get_bucket_cnt() >=
col_stat->get_histogram().get_bucket_size())) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("Invalid or inconsistent input values", K(ret), K(result_objs.at(i)));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Invalid or inconsistent input values");
} else if (OB_FAIL(col_stat->get_histogram().add_bucket(hist_bucket))) {
LOG_WARN("failed to add bucket", K(ret));
LOG_WARN("failed to push back", K(ret));
} else {/*do nothing*/}
break;
}
@ -1533,5 +1565,22 @@ int ObDbmsStatsExportImport::gen_import_column_list(const ObIArray<ObColumnStatP
return ret;
}
int ObDbmsStatsExportImport::check_col_stat_validity(ObIArray<ObOptColumnStat *> &all_cstats)
{
int ret = OB_SUCCESS;
for (int64_t i = 0; OB_SUCC(ret) && i < all_cstats.count(); ++i) {
if (OB_ISNULL(all_cstats.at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", K(all_cstats.at(i)), K(ret));
} else if (OB_UNLIKELY(all_cstats.at(i)->get_histogram().get_bucket_cnt() !=
all_cstats.at(i)->get_histogram().get_bucket_size())) {
ret = OB_ERR_DBMS_STATS_PL;
LOG_WARN("Invalid or inconsistent input values", K(ret), KPC(all_cstats.at(i)));
LOG_USER_ERROR(OB_ERR_DBMS_STATS_PL, "Invalid or inconsistent input values");
}
}
return ret;
}
} // namespace common
} // namespace oceanbase

View File

@ -155,6 +155,8 @@ private:
static int gen_import_column_list(const ObIArray<ObColumnStatParam> &column_param,
ObSqlString &column_list);
static int check_col_stat_validity(ObIArray<ObOptColumnStat *> &all_cstats);
};
} // end of sql

View File

@ -529,7 +529,7 @@ int ObDbmsStatsHistoryManager::fill_column_stat_history(ObIAllocator &allocator,
EXTRACT_INT_FIELD_MYSQL(result, "distinct_cnt_synopsis_size", llc_bitmap_size, int64_t);
if (OB_SUCC(ret)) {
hist.set_type(histogram_type);
if (hist.is_valid() && OB_FAIL(hist.prepare_allocate_buckets(bucket_cnt))) {
if (hist.is_valid() && OB_FAIL(hist.prepare_allocate_buckets(allocator, bucket_cnt))) {
LOG_WARN("failed to prepare allocate buckets", K(ret));
}
}

View File

@ -89,29 +89,26 @@ int ObDbmsStatsUtils::init_col_stats(ObIAllocator &allocator,
* then it's even distributed, Otherwise, it's skewed.
*/
int ObDbmsStatsUtils::check_range_skew(ObHistType hist_type,
const ObHistBucket *bkts,
const int64_t bkt_size,
const ObHistogram::Buckets &bkts,
int64_t standard_cnt,
bool &is_even_distributed)
{
int ret = OB_SUCCESS;
is_even_distributed = false;
if (OB_ISNULL(bkts) || OB_UNLIKELY(bkt_size == 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", K(ret), K(bkts), K(bkt_size));
} else if (hist_type == ObHistType::FREQUENCY) {
if (hist_type == ObHistType::FREQUENCY) {
is_even_distributed = true;
for (int64_t i = 0; is_even_distributed && i < bkt_size; ++i) {
for (int64_t i = 0; is_even_distributed && i < bkts.count(); ++i) {
if (i == 0) {
is_even_distributed = standard_cnt == bkts[i].endpoint_num_;
is_even_distributed = standard_cnt == bkts.at(i).endpoint_num_;
} else {
is_even_distributed = standard_cnt == bkts[i].endpoint_num_ - bkts[i - 1].endpoint_num_;
is_even_distributed = standard_cnt == bkts.at(i).endpoint_num_ -
bkts.at(i - 1).endpoint_num_;
}
}
} else if (hist_type == ObHistType::HYBIRD) {
is_even_distributed = true;
for (int64_t i = 0; is_even_distributed && i < bkt_size; ++i) {
is_even_distributed = bkts[i].endpoint_repeat_count_ <= standard_cnt;
for (int64_t i = 0; is_even_distributed && i < bkts.count(); ++i) {
is_even_distributed = bkts.at(i).endpoint_repeat_count_ <= standard_cnt;
}
} else {/*do nothing*/}
return ret;

View File

@ -34,8 +34,7 @@ public:
ObIArray<ObOptColumnStat *> &col_stats);
static int check_range_skew(ObHistType hist_type,
const ObHistBucket *bkts,
const int64_t bkt_size,
const ObHistogram::Buckets &bkts,
int64_t standard_cnt,
bool &is_even_distributed);

View File

@ -420,7 +420,7 @@ int ObHybridHistEstimator::try_build_hybrid_hist(const ObColumnStatParam &param,
ObSEArray<BucketNode, 4> pairs;
for (int64_t i = 0; OB_SUCC(ret) && i < col_stat.get_histogram().get_bucket_size(); ++i) {
const ObHistBucket &hist_bucket = col_stat.get_histogram().get_buckets()[i];
const ObHistBucket &hist_bucket = col_stat.get_histogram().get(i);
if (OB_FAIL(pairs.push_back(BucketNode(hist_bucket.endpoint_value_,
hist_bucket.endpoint_repeat_count_)))) {
LOG_WARN("failed to push back new entry", K(ret));
@ -434,9 +434,12 @@ int ObHybridHistEstimator::try_build_hybrid_hist(const ObColumnStatParam &param,
num_distinct))) {
LOG_WARN("failed to do build hybrid hist", K(ret));
} else {
col_stat.get_histogram().reset();
if (OB_FAIL(col_stat.get_histogram().add_buckets(hybrid_hist.get_buckets()))) {
LOG_WARN("failed to append hist bucket", K(ret));
col_stat.get_histogram().get_buckets().reset();
if (OB_FAIL(col_stat.get_histogram().prepare_allocate_buckets(ctx_.get_allocator(),
hybrid_hist.get_buckets().count()))) {
LOG_WARN("failed to prepare allocate buckets", K(ret));
} else if (OB_FAIL(col_stat.get_histogram().assign_buckets(hybrid_hist.get_buckets()))) {
LOG_WARN("failed to assign buckets", K(ret));
} else {
col_stat.get_histogram().set_type(ObHistType::HYBIRD);
col_stat.get_histogram().set_sample_size(total_count);

View File

@ -607,7 +607,7 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx,
ObGlobalNotNullEval not_null_eval;
ObGlobalNdvEval ndv_eval;
ObGlobalAvglenEval avglen_eval;
ObSEArray<ObHistogram *, 4> all_part_histograms;
ObSEArray<ObHistogram, 4> all_part_histograms;
int64_t total_avg_len = 0;
int64_t max_bucket_num = param.column_params_.at(i).bucket_num_;
for (int64_t j = 0; OB_SUCC(ret) && j < part_cnt; ++j) {
@ -631,7 +631,7 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx,
} else if (opt_col_stat->get_num_distinct() == 0 && opt_col_stat->get_num_null() == 0) {
/*do nothing*/
} else if (need_drive_hist && opt_col_stat->get_histogram().is_valid() &&
OB_FAIL(all_part_histograms.push_back(&opt_col_stat->get_histogram()))) {
OB_FAIL(all_part_histograms.push_back(opt_col_stat->get_histogram()))) {
LOG_WARN("failed to push back histogram", K(ret));
} else {
need_drive_hist &= opt_col_stat->get_histogram().is_valid();
@ -706,7 +706,7 @@ int ObIncrementalStatEstimator::derive_global_col_stat(ObExecContext &ctx,
return ret;
}
int ObIncrementalStatEstimator::derive_global_histogram(ObIArray<ObHistogram*> &all_part_histograms,
int ObIncrementalStatEstimator::derive_global_histogram(ObIArray<ObHistogram> &all_part_histograms,
common::ObIAllocator &allocator,
int64_t max_bucket_num,
int64_t total_row_count,
@ -727,31 +727,26 @@ int ObIncrementalStatEstimator::derive_global_histogram(ObIArray<ObHistogram*> &
top_k_fre_hist->set_window_size(1000);
top_k_fre_hist->set_item_size(256);
for (int64_t i = 0; OB_SUCC(ret) && i < all_part_histograms.count(); ++i) {
if (all_part_histograms.at(i) != NULL && all_part_histograms.at(i)->is_valid()) {
if (all_part_histograms.at(i)->get_type() == ObHistType::FREQUENCY ||
all_part_histograms.at(i)->get_type() == ObHistType::TOP_FREQUENCY ||
all_part_histograms.at(i)->get_type() == ObHistType::HYBIRD) {
const ObHistBucket *part_bkts = all_part_histograms.at(i)->get_buckets();
const int64_t part_bkt_size = all_part_histograms.at(i)->get_bucket_size();
if (OB_ISNULL(part_bkts) || OB_UNLIKELY(part_bkt_size == 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", K(ret), K(part_bkts), K(part_bkt_size));
} else {
for (int64_t j = 0; OB_SUCC(ret) && j < part_bkt_size; ++j) {
for (int64_t k = 0; OB_SUCC(ret) && k < part_bkts[j].endpoint_repeat_count_; ++k) {
if (OB_FAIL(top_k_fre_hist->add_top_k_frequency_item(part_bkts[j].endpoint_value_))) {
LOG_WARN("failed to add topk frequency item", K(ret));
} else {/*do nothing*/}
}
if (all_part_histograms.at(i).is_valid()) {
if (all_part_histograms.at(i).get_type() == ObHistType::FREQUENCY ||
all_part_histograms.at(i).get_type() == ObHistType::TOP_FREQUENCY ||
all_part_histograms.at(i).get_type() == ObHistType::HYBIRD) {
const ObHistogram::Buckets &part_bkts = all_part_histograms.at(i).get_buckets();
for (int64_t j = 0; OB_SUCC(ret) && j < part_bkts.count(); ++j) {
for (int64_t k = 0; OB_SUCC(ret) && k < part_bkts.at(j).endpoint_repeat_count_; ++k) {
if (OB_FAIL(top_k_fre_hist->add_top_k_frequency_item(
part_bkts.at(j).endpoint_value_))) {
LOG_WARN("failed to add topk frequency item", K(ret));
} else {/*do nothing*/}
}
}
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected hist type", K(ret), K(all_part_histograms.at(i)->get_type()));
LOG_WARN("get unexpected hist type", K(ret), K(all_part_histograms.at(i).get_type()));
}
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected error", KPC(all_part_histograms.at(i)));
LOG_WARN("get unexpected error", K(all_part_histograms.at(i)));
}
}
if (OB_SUCC(ret)) {
@ -760,6 +755,7 @@ int ObIncrementalStatEstimator::derive_global_histogram(ObIArray<ObHistogram*> &
} else if (top_k_fre_hist->get_buckets().count() == 0) {
/*do nothing*/
} else if (OB_FAIL(ObStatTopKHist::build_histogram_from_topk_items(
allocator,
top_k_fre_hist->get_buckets(),
max_bucket_num,
total_row_count,

View File

@ -108,7 +108,7 @@ private:
bool &need_gather_hybrid_hist,
ObOptStat &global_opt_stat);
static int derive_global_histogram(ObIArray<ObHistogram*> &all_part_histogram,
static int derive_global_histogram(ObIArray<ObHistogram> &all_part_histogram,
common::ObIAllocator &allocator,
int64_t max_bucket_num,
int64_t total_row_count,

View File

@ -42,18 +42,8 @@ void ObHistogram::reset()
type_ = ObHistType::INVALID_TYPE;
sample_size_ = -1;
density_ = -1;
if (NULL != buckets_) {
for (int64_t i = 0; i < bucket_size_; ++i) {
buckets_[i].~ObHistBucket();
}
get_allocator().free(buckets_);
inner_allocator_.reset();
buckets_ = NULL;
bucket_size_ = 0;
max_bucket_size_ = 0;
}
pop_freq_ = 0;
pop_count_ = 0;
bucket_cnt_ = 0;
buckets_.reset();
}
const char *ObHistogram::get_type_name() const
@ -74,8 +64,8 @@ const char *ObHistogram::get_type_name() const
int64_t ObHistogram::deep_copy_size() const
{
int64_t size = sizeof(*this);
for (int64_t i = 0; i < bucket_size_; ++i) {
size += sizeof(ObHistBucket) + buckets_[i].deep_copy_size();
for (int64_t i = 0; i < buckets_.count(); ++i) {
size += sizeof(ObHistBucket) + buckets_.at(i).deep_copy_size();
}
return size;
}
@ -86,17 +76,17 @@ int ObHistogram::deep_copy(const ObHistogram &src, char *buf, const int64_t buf_
type_ = src.type_;
sample_size_ = src.sample_size_;
density_ = src.density_;
bucket_cnt_ = src.bucket_cnt_;
int64_t copy_size = src.deep_copy_size();
if (OB_UNLIKELY(copy_size + pos > buf_len)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("buffer size is not enough", K(ret), K(copy_size), K(pos), K(buf_len));
} else if (src.bucket_size_ > 0 && src.buckets_ != NULL) {
buckets_ = new (buf + pos) ObHistBucket[src.bucket_size_];
bucket_size_ = src.bucket_size_;
max_bucket_size_ = src.bucket_size_;
pos += sizeof(ObHistBucket) * bucket_size_;
for (int64_t i = 0; OB_SUCC(ret) && i < bucket_size_; ++i) {
if (OB_FAIL(buckets_[i].deep_copy(src.buckets_[i], buf, buf_len, pos))) {
} else if (!src.buckets_.empty()) {
ObHistBucket *new_buckets = new (buf + pos) ObHistBucket[src.buckets_.count()];
buckets_ = ObArrayWrap<ObHistBucket>(new_buckets, src.buckets_.count());
pos += sizeof(ObHistBucket) * src.buckets_.count();
for (int64_t i = 0; OB_SUCC(ret) && i < buckets_.count(); ++i) {
if (OB_FAIL(buckets_.at(i).deep_copy(src.buckets_.at(i), buf, buf_len, pos))) {
LOG_WARN("deep copy bucket failed", K(ret), K(buf_len), K(pos));
}
}
@ -104,128 +94,51 @@ int ObHistogram::deep_copy(const ObHistogram &src, char *buf, const int64_t buf_
return ret;
}
int ObHistogram::assign_buckets(const ObHistBucket *buckets, const int64_t bucket_size)
int ObHistogram::prepare_allocate_buckets(ObIAllocator &allocator, const int64_t bucket_size)
{
int ret = OB_SUCCESS;
void *buf = NULL;
if (buckets == NULL || bucket_size == 0) {
//do nothing
} else if (buckets_ != NULL || OB_UNLIKELY(bucket_size_ > 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret), K(buckets_), K(bucket_size_));
} else if (OB_ISNULL(buf = get_allocator().alloc(sizeof(ObHistBucket) * bucket_size))) {
COMMON_LOG_RET(WARN, OB_ALLOCATE_MEMORY_FAILED, "allocate memory for buckets failed.");
} else {
buckets_ = new (buf) ObHistBucket[bucket_size];
bucket_size_ = bucket_size;
max_bucket_size_ = bucket_size;
for (int64_t i = 0; i < bucket_size_; ++i) {
buckets_[i].endpoint_repeat_count_ = buckets[i].endpoint_repeat_count_;
buckets_[i].endpoint_num_ = buckets[i].endpoint_num_;
buckets_[i].endpoint_value_ = buckets[i].endpoint_value_;
}
if (OB_FAIL(buckets_.allocate_array(allocator, bucket_size))) {
LOG_WARN("failed to prepare allocate buckets", K(ret));
}
return ret;
}
//the endpoint value is shallow copy!!!!!!!!!
int ObHistogram::add_buckets(const ObIArray<ObHistBucket> &buckets)
{
int ret = OB_SUCCESS;
void *buf = NULL;
if (buckets.empty()) {
//do nothing
} else if (buckets_ != NULL || OB_UNLIKELY(bucket_size_ > 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret), K(buckets_), K(bucket_size_));
} else if (OB_ISNULL(buf = get_allocator().alloc(sizeof(ObHistBucket) * buckets.count()))) {
COMMON_LOG_RET(WARN, OB_ALLOCATE_MEMORY_FAILED, "allocate memory for buckets failed.");
} else {
buckets_ = new (buf) ObHistBucket[buckets.count()];
bucket_size_ = buckets.count();
max_bucket_size_ = buckets.count();
for (int64_t i = 0; i < bucket_size_; ++i) {
buckets_[i].endpoint_repeat_count_ = buckets.at(i).endpoint_repeat_count_;
buckets_[i].endpoint_num_ = buckets.at(i).endpoint_num_;
buckets_[i].endpoint_value_ = buckets.at(i).endpoint_value_;
}
}
return ret;
}
//the endpoint value is shallow copy!!!!!!!!!
int ObHistogram::add_bucket(const ObHistBucket &bucket)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(buckets_) && bucket_size_ != 0) {
if (OB_UNLIKELY(bucket_cnt_ >= buckets_.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret), K(buckets_), K(bucket_size_), K(max_bucket_size_));
} else if (bucket_size_ < max_bucket_size_) {
if (OB_ISNULL(buckets_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret), K(buckets_), K(bucket_size_), K(max_bucket_size_));
} else {
buckets_[bucket_size_].endpoint_repeat_count_ = bucket.endpoint_repeat_count_;
buckets_[bucket_size_].endpoint_num_ = bucket.endpoint_num_;
buckets_[bucket_size_].endpoint_value_ = bucket.endpoint_value_;
++ bucket_size_;
}
LOG_WARN("get unexpected null", K(ret), K(bucket_cnt_), K(buckets_));
} else {
void *buf = NULL;
max_bucket_size_ = bucket_size_ == 0 ? 1 : 2 * bucket_size_;
if (OB_ISNULL(buf = get_allocator().alloc(sizeof(ObHistBucket) * max_bucket_size_))) {
COMMON_LOG_RET(WARN, OB_ALLOCATE_MEMORY_FAILED, "allocate memory for buckets failed.");
} else {
ObHistBucket *new_buckets = new (buf) ObHistBucket[max_bucket_size_];
for (int64_t i = 0; i < bucket_size_; ++i) {
new_buckets[i].endpoint_repeat_count_ = buckets_[i].endpoint_repeat_count_;
new_buckets[i].endpoint_num_ = buckets_[i].endpoint_num_;
new_buckets[i].endpoint_value_ = buckets_[i].endpoint_value_;
}
new_buckets[bucket_size_].endpoint_repeat_count_ = bucket.endpoint_repeat_count_;
new_buckets[bucket_size_].endpoint_num_ = bucket.endpoint_num_;
new_buckets[bucket_size_].endpoint_value_ = bucket.endpoint_value_;
++ bucket_size_;
get_allocator().free(buckets_);
buckets_ = new_buckets;
}
buckets_.at(bucket_cnt_++) = bucket;
}
return ret;
}
int ObHistogram::prepare_allocate_buckets(const int64_t buckets_num)
int ObHistogram::assign_buckets(const ObIArray<ObHistBucket> &buckets)
{
int ret = OB_SUCCESS;
void *buf = NULL;
if (buckets_ != NULL || OB_UNLIKELY(bucket_size_ > 0 || buckets_num <= 0)) {
if (OB_UNLIKELY(buckets_.count() != buckets.count() || bucket_cnt_ != buckets.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret), K(buckets_), K(bucket_size_), K(buckets_num));
} else if (OB_ISNULL(buf = get_allocator().alloc(sizeof(ObHistBucket) * buckets_num))) {
COMMON_LOG_RET(WARN, OB_ALLOCATE_MEMORY_FAILED, "allocate memory for buckets failed.");
LOG_WARN("get unexpected null", K(ret), K(buckets_), K(buckets), K(bucket_cnt_));
} else {
buckets_ = new (buf) ObHistBucket[buckets_num];
max_bucket_size_ = buckets_num;
for (int64_t i = 0; i < buckets.count(); ++i) {
buckets_.at(i) = buckets.at(i);
}
}
return ret;
}
int64_t ObHistogram::to_string(char *buf, const int64_t buf_len) const
int ObHistogram::assign(const ObHistogram &other)
{
int64_t pos = 0;
J_OBJ_START();
J_KV("Type", get_type_name(),
K_(sample_size),
K_(density),
K_(bucket_size),
K_(max_bucket_size),
K_(buckets));
if (buckets_ != NULL && bucket_size_ > 0 && max_bucket_size_ > 0) {
for (int64_t i = 0; i < bucket_size_; ++i) {
J_KV(K(buckets_[i]));
}
}
J_OBJ_END();
return pos;
int ret = OB_SUCCESS;
type_ = other.type_;
sample_size_ = other.sample_size_;
density_ = other.density_;
bucket_cnt_ = other.bucket_cnt_;
pop_freq_ = other.pop_freq_;
pop_count_ = other.pop_count_;
return buckets_.assign(other.buckets_);
}
ObOptColumnStat::ObOptColumnStat()
@ -266,7 +179,7 @@ ObOptColumnStat::ObOptColumnStat(ObIAllocator &allocator)
max_value_(),
llc_bitmap_size_(0),
llc_bitmap_(NULL),
histogram_(allocator),
histogram_(),
last_analyzed_(0),
cs_type_(CS_TYPE_INVALID),
inner_max_allocator_("OptColStatMax"),

View File

@ -72,6 +72,7 @@ class ObHistogram
public:
friend class ObOptColumnStat;
typedef ObArrayWrap<ObHistBucket> Buckets;
enum class BoundType {
LOWER,
UPPER,
@ -82,35 +83,18 @@ public:
type_(ObHistType::INVALID_TYPE),
sample_size_(-1),
density_(0),
bucket_size_(0),
max_bucket_size_(0),
buckets_(NULL),
bucket_cnt_(0),
buckets_(),
pop_freq_(0),
pop_count_(0),
allocator_(NULL),
inner_allocator_("ObHistogram")
{}
ObHistogram(ObIAllocator &allocator) :
type_(ObHistType::INVALID_TYPE),
sample_size_(-1),
density_(0),
bucket_size_(0),
max_bucket_size_(0),
buckets_(NULL),
pop_freq_(0),
pop_count_(0),
allocator_(&allocator),
inner_allocator_("ObHistogram")
{}
pop_count_(0)
{}
~ObHistogram() { reset(); }
void reset();
int deep_copy(const ObHistogram &src, char *buf, const int64_t buf_len, int64_t &pos);
int assign_buckets(const ObHistBucket *buckets, const int64_t bucket_size);
int assign(const ObHistogram &other);
int64_t deep_copy_size() const;
bool is_valid() const
@ -128,43 +112,44 @@ public:
void set_sample_size(int64_t sample_size) { sample_size_ = sample_size; }
double get_density() const { return density_; }
void set_density(double density) { density_ = density; }
int64_t get_bucket_size() const { return bucket_size_; }
int64_t get_bucket_cnt() const { return bucket_cnt_; }
void set_bucket_cnt(int64_t bucket_cnt) { bucket_cnt_ = bucket_cnt; }
int add_buckets(const ObIArray<ObHistBucket> &buckets);
int64_t get_bucket_size() const { return buckets_.count(); }
int add_bucket(const ObHistBucket &bucket);
int prepare_allocate_buckets(const int64_t buckets_num);
ObHistBucket *get_buckets() { return buckets_; }
const ObHistBucket *get_buckets() const { return buckets_; }
ObHistBucket &get(int64_t i) { return buckets_.at(i); }
const ObHistBucket &get(int64_t i) const { return buckets_.at(i); }
Buckets &get_buckets() { return buckets_; }
const Buckets &get_buckets() const { return buckets_; }
int64_t get_pop_frequency() const { return pop_freq_; }
void set_pop_frequency(int64_t pop_freq) { pop_freq_ = pop_freq; }
int64_t get_pop_count() const { return pop_count_; }
void set_pop_count(int64_t pop_count) { pop_count_ = pop_count; }
int prepare_allocate_buckets(ObIAllocator &allocator, const int64_t bucket_size);
int add_bucket(const ObHistBucket &bucket);
int assign_buckets(const ObIArray<ObHistBucket> &buckets);
void calc_density(ObHistType hist_type,
const int64_t row_count,
const int64_t pop_row_count,
const int64_t ndv,
const int64_t pop_ndv);
ObIAllocator &get_allocator() { return allocator_ != NULL ? *allocator_ : inner_allocator_; }
int64_t to_string(char* buf, const int64_t buf_len) const;
TO_STRING_KV("Type", get_type_name(),
K_(sample_size),
K_(density),
K_(bucket_cnt),
K_(buckets));
protected:
ObHistType type_;
int64_t sample_size_;
double density_;
int64_t bucket_size_;
int64_t max_bucket_size_;
ObHistBucket *buckets_;
int64_t bucket_cnt_;
Buckets buckets_;
int64_t pop_freq_; // only used during gather table stats
int64_t pop_count_; // only used during gather table stats
ObIAllocator *allocator_;
ObArenaAllocator inner_allocator_;
};
class ObOptColumnStat : public common::ObIKVCacheValue
@ -279,6 +264,7 @@ public:
const ObHistogram &get_histogram() const { return histogram_; }
ObHistogram &get_histogram() { return histogram_; }
int64_t get_bucket_num() const { return histogram_.get_bucket_cnt(); }
virtual int64_t size() const override;
virtual int deep_copy(char *buf, const int64_t buf_len, ObIKVCacheValue *&value) const override;

View File

@ -540,42 +540,27 @@ int ObOptStatSqlService::construct_delete_column_histogram_sql(const uint64_t te
ObSqlString &delete_histogram_sql)
{
int ret = OB_SUCCESS;
ObSEArray<ObOptKeyColumnStat, 4> key_column_stats;
ObArenaAllocator allocator(ObModIds::OB_BUFFER);
ObSqlString where_str;
const uint64_t exec_tenant_id = ObSchemaUtils::get_exec_tenant_id(tenant_id);
for (int64_t i = 0; OB_SUCC(ret) && i < column_stats.count(); ++i) {
if (OB_ISNULL(column_stats.at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret), K(column_stats.at(i)));
} else {
ObOptColumnStat::Key check_key(tenant_id,
column_stats.at(i)->get_table_id(),
} else if (where_str.append_fmt(" %s (%lu, %ld, %ld, %lu) %s",
i != 0 ? "," : "(TENANT_ID, TABLE_ID, PARTITION_ID, COLUMN_ID) IN (",
ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id),
ObSchemaUtils::get_extract_schema_id(exec_tenant_id, column_stats.at(i)->get_table_id()),
column_stats.at(i)->get_partition_id(),
column_stats.at(i)->get_column_id());
void *ptr = NULL;
if (OB_ISNULL(ptr = allocator.alloc(sizeof(ObOptColumnStat::Key)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("memory is not enough", K(ret), K(ptr));
} else {
ObOptKeyColumnStat tmp_key_col_stat;
tmp_key_col_stat.key_ = new (ptr) ObOptColumnStat::Key(tenant_id,
column_stats.at(i)->get_table_id(),
column_stats.at(i)->get_partition_id(),
column_stats.at(i)->get_column_id());
tmp_key_col_stat.stat_ = const_cast<ObOptColumnStat*>(column_stats.at(i));
if (OB_FAIL(key_column_stats.push_back(tmp_key_col_stat))) {
LOG_WARN("failed to push back", K(ret));
} else {/*do nothing*/}
}
column_stats.at(i)->get_column_id(),
i == column_stats.count() - 1 ? ")" : "")) {
LOG_WARN("failed to append fmt", K(ret));
}
}
if (OB_SUCC(ret) && !key_column_stats.empty()) {
ObSqlString keys_list_str;
if (OB_FAIL(generate_specified_keys_list_str(tenant_id, key_column_stats, keys_list_str))) {
LOG_WARN("failed to generate specified keys list str", K(ret), K(key_column_stats));
} else if (OB_FAIL(delete_histogram_sql.append_fmt(" %s %.*s;", DELETE_HISTOGRAM_STAT_SQL,
keys_list_str.string().length(),
keys_list_str.string().ptr()))) {
LOG_WARN("fail to append SQL where string.", K(ret));
if (OB_SUCC(ret) && !where_str.empty()) {
if (OB_FAIL(delete_histogram_sql.append_fmt(" %s %.*s;", DELETE_HISTOGRAM_STAT_SQL,
where_str.string().length(),
where_str.string().ptr()))) {
LOG_WARN("fail to append SQL where string.", K(ret));
} else {
LOG_TRACE("Succeed to construct delete column histogram sql", K(delete_histogram_sql));
}
@ -617,7 +602,7 @@ int ObOptStatSqlService::construct_histogram_insert_sql(share::schema::ObSchemaG
} else if (OB_FAIL(get_histogram_stat_history_sql(tenant_id,
*column_stats.at(i),
allocator,
hist.get_buckets()[j],
hist.get(j),
current_time,
endpoint_meta,
tmp))) {
@ -630,7 +615,7 @@ int ObOptStatSqlService::construct_histogram_insert_sql(share::schema::ObSchemaG
} else if (!need_histogram && OB_FAIL(insert_histogram_sql.append(INSERT_HISTOGRAM_STAT_SQL))) {
LOG_WARN("failed to append sql", K(ret));
} else if (OB_FAIL(get_histogram_stat_sql(tenant_id, *column_stats.at(i),
allocator, hist.get_buckets()[j], endpoint_meta, tmp))) {
allocator, hist.get(j), endpoint_meta, tmp))) {
LOG_WARN("failed to get histogram sql", K(ret));
} else if (OB_FAIL(insert_histogram_sql.append_fmt("%s (%s)", (!need_histogram ? "" : ","), tmp.ptr()))) {
LOG_WARN("failed to append sql", K(ret));
@ -935,7 +920,7 @@ int ObOptStatSqlService::get_column_stat_sql(const uint64_t tenant_id,
OB_FAIL(dml_splicer.add_column("distinct_cnt_synopsis_size", llc_comp_size * 2)) ||
OB_FAIL(dml_splicer.add_column("sample_size", stat.get_histogram().get_sample_size())) ||
OB_FAIL(dml_splicer.add_column("density", stat.get_histogram().get_density())) ||
OB_FAIL(dml_splicer.add_column("bucket_cnt", stat.get_histogram().get_bucket_size())) ||
OB_FAIL(dml_splicer.add_column("bucket_cnt", stat.get_histogram().get_bucket_cnt())) ||
OB_FAIL(dml_splicer.add_column("histogram_type", stat.get_histogram().get_type())) ||
OB_FAIL(dml_splicer.add_column("global_stats", 0)) ||
OB_FAIL(dml_splicer.add_column("user_stats", 0))) {
@ -1010,7 +995,7 @@ int ObOptStatSqlService::get_column_stat_history_sql(const uint64_t tenant_id,
OB_FAIL(dml_splicer.add_column("distinct_cnt_synopsis_size", llc_comp_size * 2)) ||
OB_FAIL(dml_splicer.add_column("sample_size", stat.get_histogram().get_sample_size())) ||
OB_FAIL(dml_splicer.add_column("density", stat.get_histogram().get_density())) ||
OB_FAIL(dml_splicer.add_column("bucket_cnt", stat.get_histogram().get_bucket_size())) ||
OB_FAIL(dml_splicer.add_column("bucket_cnt", stat.get_histogram().get_bucket_cnt())) ||
OB_FAIL(dml_splicer.add_column("histogram_type", stat.get_histogram().get_type()))) {
LOG_WARN("failed to add dml splicer column", K(ret));
} else if (OB_FAIL(dml_splicer.splice_values(sql_string))) {
@ -1356,7 +1341,7 @@ int ObOptStatSqlService::fill_column_stat(ObIAllocator &allocator,
EXTRACT_INT_FIELD_MYSQL(result, "distinct_cnt_synopsis_size", llc_bitmap_size, int64_t);
if (OB_SUCC(ret)) {
hist.set_type(histogram_type);
if (hist.is_valid() && OB_FAIL(hist.prepare_allocate_buckets(bucket_cnt))) {
if (hist.is_valid() && OB_FAIL(hist.prepare_allocate_buckets(allocator, bucket_cnt))) {
LOG_WARN("failed to prepare allocate buckets", K(ret));
}
}
@ -1509,7 +1494,7 @@ int ObOptStatSqlService::fill_bucket_stat(ObIAllocator &allocator,
if (OB_FAIL(hex_str_to_obj(str.ptr(), str.length(), allocator, bkt.endpoint_value_))) {
LOG_WARN("deserialize object value failed.", K(stat), K(ret));
} else if (OB_FAIL(dst_key_col_stat.stat_->get_histogram().add_bucket(bkt))) {
LOG_WARN("failed to add backet", K(ret));
LOG_WARN("failed to push back buckets", K(ret));
} else {/*do nothing*/}
}
}

View File

@ -323,7 +323,7 @@ public:
ObBucketCompare &compare_;
};
int ObStatTopKHist::decode(ObObj &obj)
int ObStatTopKHist::decode(ObObj &obj, ObIAllocator &allocator)
{
int ret = OB_SUCCESS;
ObTopKFrequencyHistograms topk_hist;
@ -334,7 +334,8 @@ int ObStatTopKHist::decode(ObObj &obj)
LOG_WARN("param is null", K(ret), K(bucket_num), K(col_param_));
} else if (OB_FAIL(topk_hist.read_result(obj))) {
LOG_WARN("failed to read result from obj", K(ret));
} else if (OB_FAIL(build_histogram_from_topk_items(topk_hist.get_buckets(),
} else if (OB_FAIL(build_histogram_from_topk_items(allocator,
topk_hist.get_buckets(),
col_param_->bucket_num_,
tab_stat_->get_row_count(),
col_stat_->get_num_not_null(),
@ -345,7 +346,8 @@ int ObStatTopKHist::decode(ObObj &obj)
return ret;
}
int ObStatTopKHist::build_histogram_from_topk_items(const ObIArray<ObTopkItem> &buckets,
int ObStatTopKHist::build_histogram_from_topk_items(ObIAllocator &allocator,
const ObIArray<ObTopkItem> &buckets,
int64_t max_bucket_num,
int64_t total_row_count,
int64_t not_null_count,
@ -377,7 +379,8 @@ int ObStatTopKHist::build_histogram_from_topk_items(const ObIArray<ObTopkItem> &
tmp.at(i).endpoint_num_ += tmp.at(i - 1).endpoint_num_;
}
if (OB_SUCC(ret)) {
if (OB_FAIL(try_build_topk_histogram(tmp,
if (OB_FAIL(try_build_topk_histogram(allocator,
tmp,
max_bucket_num,
total_row_count,
not_null_count,
@ -400,7 +403,8 @@ int ObStatTopKHist::build_histogram_from_topk_items(const ObIArray<ObTopkItem> &
* @param histogram, the result histogram built from bkts
* @return
*/
int ObStatTopKHist::try_build_topk_histogram(const ObIArray<ObHistBucket> &bkts,
int ObStatTopKHist::try_build_topk_histogram(ObIAllocator &allocator,
const ObIArray<ObHistBucket> &bkts,
const int64_t max_bucket_num,
const int64_t total_row_count,
const int64_t not_null_count,
@ -417,16 +421,22 @@ int ObStatTopKHist::try_build_topk_histogram(const ObIArray<ObHistBucket> &bkts,
// all vals are null, there is no need to build a histogram
histogram.set_type(ObHistType::INVALID_TYPE);
histogram.set_sample_size(0);
histogram.set_bucket_cnt(0);
histogram.set_density(0);
} else if (num > 0 && bkts.at(num - 1).endpoint_num_ == not_null_count) {
histogram.set_type(ObHistType::FREQUENCY);
histogram.set_sample_size(not_null_count);
histogram.set_bucket_cnt(bkts.count());
histogram.calc_density(ObHistType::FREQUENCY,
not_null_count,
not_null_count,
num_distinct,
bkts.count());
ret = histogram.add_buckets(bkts);
if (OB_FAIL(histogram.prepare_allocate_buckets(allocator, bkts.count()))) {
LOG_WARN("failed to prepare allocate buckets", K(ret));
} else if (OB_FAIL(histogram.assign_buckets(bkts))) {
LOG_WARN("failed to assign buckets", K(ret));
} else {/*do nothing*/}
} else if (num > 0 && bkts.at(num - 1).endpoint_num_ >=
(not_null_count * (1 - 1.0 / max_bucket_num))) {
histogram.set_type(ObHistType::TOP_FREQUENCY);
@ -436,7 +446,7 @@ int ObStatTopKHist::try_build_topk_histogram(const ObIArray<ObHistBucket> &bkts,
bkts.at(num - 1).endpoint_num_,
num_distinct,
num);
if (OB_FAIL(histogram.prepare_allocate_buckets(num))) {
if (OB_FAIL(histogram.prepare_allocate_buckets(allocator, num))) {
LOG_WARN("failed to prepare allocate buckets", K(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < num; ++i) {
@ -450,7 +460,12 @@ int ObStatTopKHist::try_build_topk_histogram(const ObIArray<ObHistBucket> &bkts,
// if the topk histogram contains all records of the table
// then we can build hybrid histogram directly from the topk result.
histogram.set_sample_size(not_null_count);
ret = histogram.add_buckets(bkts);
histogram.set_bucket_cnt(bkts.count());
if (OB_FAIL(histogram.prepare_allocate_buckets(allocator, bkts.count()))) {
LOG_WARN("failed to prepare allocate buckets", K(ret));
} else if (OB_FAIL(histogram.assign_buckets(bkts))) {
LOG_WARN("failed to assign buckets", K(ret));
} else {/*do nothing*/}
}
}
return ret;
@ -688,7 +703,7 @@ int ObStatHybridHist::gen_expr(char *buf, const int64_t buf_len, int64_t &pos)
return ret;
}
int ObStatHybridHist::decode(ObObj &obj)
int ObStatHybridHist::decode(ObObj &obj, ObIAllocator &allocator)
{
int ret = OB_SUCCESS;
ObHybridHistograms hybrid_hist;
@ -703,9 +718,12 @@ int ObStatHybridHist::decode(ObObj &obj)
} else if (OB_FAIL(hybrid_hist.read_result(obj))) {
LOG_WARN("failed to read result from obj", K(ret));
} else {
col_stat_->get_histogram().reset();
if (OB_FAIL(col_stat_->get_histogram().add_buckets(hybrid_hist.get_buckets()))) {
LOG_WARN("failed to append hist bucket", K(ret));
col_stat_->get_histogram().get_buckets().reset();
col_stat_->get_histogram().set_bucket_cnt(hybrid_hist.get_buckets().count());
if (OB_FAIL(col_stat_->get_histogram().prepare_allocate_buckets(allocator, hybrid_hist.get_buckets().count()))) {
LOG_WARN("failed to prepare allocate buckets", K(ret));
} else if (OB_FAIL(col_stat_->get_histogram().assign_buckets(hybrid_hist.get_buckets()))) {
LOG_WARN("failed to assign buckets", K(ret));
} else {
col_stat_->get_histogram().set_type(ObHistType::HYBIRD);
col_stat_->get_histogram().set_sample_size(hybrid_hist.get_total_count());

View File

@ -45,6 +45,11 @@ public:
UNUSED(obj);
return OB_NOT_IMPLEMENT;
}
virtual int decode(ObObj &obj, ObIAllocator &allocator)
{
UNUSED(allocator);
return decode(obj);
}
TO_STRING_KV(K(is_needed()));
@ -235,14 +240,16 @@ public:
tab_stat_(tab_stat)
{}
static int build_histogram_from_topk_items(const ObIArray<ObTopkItem> &buckets,
static int build_histogram_from_topk_items(ObIAllocator &allocator,
const ObIArray<ObTopkItem> &buckets,
int64_t max_bucket_num,
int64_t total_row_count,
int64_t not_null_count,
int64_t num_distinct,
ObHistogram &histogram);
static int try_build_topk_histogram(const ObIArray<ObHistBucket> &bkts,
static int try_build_topk_histogram(ObIAllocator &allocator,
const ObIArray<ObHistBucket> &bkts,
const int64_t max_bucket_num,
const int64_t total_row_count,
const int64_t not_null_count,
@ -257,7 +264,7 @@ public:
// const bucket_size = 256;
virtual bool is_needed() const override;
virtual int gen_expr(char *buf, const int64_t buf_len, int64_t &pos) override;
virtual int decode(ObObj &obj) override;
virtual int decode(ObObj &obj, ObIAllocator &allocator) override;
protected:
ObOptTableStat *tab_stat_;
};
@ -292,7 +299,7 @@ public:
{}
virtual int gen_expr(char *buf, const int64_t buf_len, int64_t &pos) override;
virtual int decode(ObObj &obj) override;
virtual int decode(ObObj &obj, ObIAllocator &allocator) override;
private:
bool is_null_item_;
};

View File

@ -375,7 +375,7 @@ int ObStatsEstimator::do_estimate(uint64_t tenant_id,
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(decode())) {
if (OB_FAIL(decode(ctx_.get_allocator()))) {
LOG_WARN("failed to decode results", K(ret));
} else if (copy_type == COPY_ALL_STAT &&
OB_FAIL(copy_opt_stat(src_opt_stat, dst_opt_stats))) {
@ -411,7 +411,7 @@ int ObStatsEstimator::do_estimate(uint64_t tenant_id,
return ret;
}
int ObStatsEstimator::decode()
int ObStatsEstimator::decode(ObIAllocator &allocator)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(stat_items_.count() != results_.count())) {
@ -419,7 +419,7 @@ int ObStatsEstimator::decode()
LOG_WARN("size does not match", K(ret), K(stat_items_.count()), K(results_.count()));
}
for (int64_t i = 0; OB_SUCC(ret) && i < stat_items_.count(); ++i) {
if (OB_FAIL(stat_items_.at(i)->decode(results_.at(i)))) {
if (OB_FAIL(stat_items_.at(i)->decode(results_.at(i), allocator))) {
LOG_WARN("failed to decode statistic result", K(ret));
}
}
@ -511,8 +511,8 @@ int ObStatsEstimator::copy_col_stats(const int64_t cur_row_cnt,
dst_col_stats.at(i)->get_histogram().set_type(src_hist.get_type());
dst_col_stats.at(i)->get_histogram().set_sample_size(src_col_stats.at(i)->get_num_not_null());
dst_col_stats.at(i)->get_histogram().set_density(src_hist.get_density());
if (OB_FAIL(dst_col_stats.at(i)->get_histogram().assign_buckets(src_hist.get_buckets(),
src_hist.get_bucket_size()))) {
dst_col_stats.at(i)->get_histogram().set_bucket_cnt(src_hist.get_bucket_cnt());
if (OB_FAIL(dst_col_stats.at(i)->get_histogram().get_buckets().assign(src_hist.get_buckets()))) {
LOG_WARN("failed to assign buckets", K(ret));
} else {
LOG_TRACE("Succeed to copy col stat", K(*dst_col_stats.at(i)), K(*src_col_stats.at(i)));
@ -563,13 +563,13 @@ int ObStatsEstimator::copy_hybrid_hist_stat(ObOptStat &src_opt_stat,
ObHistogram &src_hist = src_col_stat->get_histogram();
dst_col_stat->get_histogram().set_type(src_hist.get_type());
dst_col_stat->get_histogram().set_sample_size(src_hist.get_sample_size());
dst_col_stat->get_histogram().set_bucket_cnt(src_hist.get_bucket_cnt());
dst_col_stat->get_histogram().calc_density(ObHistType::HYBIRD,
src_hist.get_sample_size(),
src_hist.get_pop_frequency(),
dst_col_stat->get_num_distinct(),
src_hist.get_pop_count());
if (OB_FAIL(dst_col_stat->get_histogram().assign_buckets(src_hist.get_buckets(),
src_hist.get_bucket_size()))) {
if (OB_FAIL(dst_col_stat->get_histogram().get_buckets().assign(src_hist.get_buckets()))) {
LOG_WARN("failed to assign buckets", K(ret));
} else {
LOG_TRACE("Succeed to copy histogram", K(*dst_col_stat), K(i), K(j));

View File

@ -43,7 +43,7 @@ protected:
int64_t get_item_size() const { return stat_items_.count(); }
int decode();
int decode(ObIAllocator &allocator);
int add_result(ObObj &obj) { return results_.push_back(obj); }

View File

@ -4894,18 +4894,17 @@ int ObLogPlan::get_popular_values_hash(ObIAllocator &allocator,
int ret = OB_SUCCESS;
if (OB_ISNULL(handle.stat_)
|| 0 >= handle.stat_->get_last_analyzed()
|| OB_ISNULL(handle.stat_->get_histogram().get_buckets())
|| handle.stat_->get_histogram().get_bucket_size() <= 0) {
// no histogram info, don't use hybrid hash
LOG_DEBUG("table not analyzed. disable hybrid hash DM", K(ret));
} else {
const ObHistogram &histogram = handle.stat_->get_histogram();
// get total value count via last bucket by it's cumulative endpoint num
const ObHistBucket &last_bucket = histogram.get_buckets()[histogram.get_bucket_size() - 1];
const ObHistBucket &last_bucket = histogram.get(histogram.get_bucket_size() - 1);
int64_t total_cnt = std::max(1L, last_bucket.endpoint_num_); // avoid zero div
int64_t min_freq = optimizer_context_.get_session_info()->get_px_join_skew_minfreq();
for (int64_t i = 0; OB_SUCC(ret) && i < histogram.get_bucket_size(); ++i) {
const ObHistBucket &bucket = histogram.get_buckets()[i];
const ObHistBucket &bucket = histogram.get(i);
int64_t freq = bucket.endpoint_repeat_count_ * 100 / total_cnt;
if (freq >= min_freq) {
ObObj value;

View File

@ -3276,11 +3276,11 @@ int ObOptSelectivity::get_bucket_bound_idx(const ObHistogram &hist,
int64_t right = hist.get_bucket_size() - 1;
idx = -1;
is_equal = false;
if (OB_LIKELY(hist.get_bucket_size() > 0 && hist.get_buckets() != NULL)) {
if (OB_LIKELY(hist.get_bucket_size() > 0)) {
while (OB_SUCC(ret) && left <= right) {
int64_t mid = (right + left) / 2;
int eq_cmp = 0;
if (OB_FAIL(hist.get_buckets()[mid].endpoint_value_.compare(value, eq_cmp))) {
if (OB_FAIL(hist.get(mid).endpoint_value_.compare(value, eq_cmp))) {
LOG_WARN("failed to compare object", K(ret));
} else if (eq_cmp > 0) {
// value < bucket[mid].ev
@ -3328,7 +3328,7 @@ int ObOptSelectivity::get_equal_pred_sel(const ObHistogram &histogram,
} else if (idx < 0 || idx >= histogram.get_bucket_size() || !is_equal) {
density = histogram.get_density();
} else {
density = static_cast<double>(histogram.get_buckets()[idx].endpoint_repeat_count_)
density = static_cast<double>(histogram.get(idx).endpoint_repeat_count_)
/ histogram.get_sample_size();
}
if (OB_SUCC(ret) && sample_size_scale > 0) {
@ -3430,8 +3430,8 @@ int ObOptSelectivity::get_less_pred_sel(const ObHistogram &histogram,
} else if (idx >= histogram.get_bucket_size()) {
density = 1.0;
} else if (is_equal) {
double frequency = histogram.get_buckets()[idx].endpoint_num_ -
(inclusive ? 0 : histogram.get_buckets()[idx].endpoint_repeat_count_);
double frequency = histogram.get(idx).endpoint_num_ -
(inclusive ? 0 : histogram.get(idx).endpoint_repeat_count_);
density = frequency / histogram.get_sample_size();
} else {
double last_bucket_count = 0;
@ -3439,22 +3439,22 @@ int ObOptSelectivity::get_less_pred_sel(const ObHistogram &histogram,
// b[i].ev < maxv < b[i+1].ev
// estimate how many elements (smaller than maxv) in bucket[i+1] there are
ObObj minscalar, maxscalar, startscalar, endscalar;
ObObj minobj(histogram.get_buckets()[idx].endpoint_value_);
ObObj maxobj(histogram.get_buckets()[idx+1].endpoint_value_);
ObObj minobj(histogram.get(idx).endpoint_value_);
ObObj maxobj(histogram.get(idx+1).endpoint_value_);
ObObj startobj(minobj), endobj(maxv);
if (OB_FAIL(ObOptEstObjToScalar::convert_objs_to_scalars(
&minobj, &maxobj, &startobj, &endobj,
&minscalar, &maxscalar, &startscalar, &endscalar))) {
LOG_WARN("failed to convert objs to scalars", K(ret));
} else if (maxscalar.get_double() - minscalar.get_double() > OB_DOUBLE_EPSINON) {
last_bucket_count = histogram.get_buckets()[idx+1].endpoint_num_ -
histogram.get_buckets()[idx+1].endpoint_repeat_count_ -
histogram.get_buckets()[idx].endpoint_num_;
last_bucket_count = histogram.get(idx+1).endpoint_num_ -
histogram.get(idx+1).endpoint_repeat_count_ -
histogram.get(idx).endpoint_num_;
last_bucket_count *= (endscalar.get_double() - startscalar.get_double()) /
(maxscalar.get_double() - minscalar.get_double());
}
}
density = static_cast<double>(histogram.get_buckets()[idx].endpoint_num_ + last_bucket_count)
density = static_cast<double>(histogram.get(idx).endpoint_num_ + last_bucket_count)
/ histogram.get_sample_size();
}
LOG_TRACE("link bug", K(density), K(maxv), K(inclusive), K(idx), K(is_equal));
@ -4076,15 +4076,15 @@ int ObOptSelectivity::get_join_pred_rows(const ObHistogram &left_hist,
int64_t ridx = 0;
while (OB_SUCC(ret) && lidx < left_hist.get_bucket_size() && ridx < right_hist.get_bucket_size()) {
int eq_cmp = 0;
if (OB_FAIL(left_hist.get_buckets()[lidx].endpoint_value_.compare(right_hist.get_buckets()[ridx].endpoint_value_,
if (OB_FAIL(left_hist.get(lidx).endpoint_value_.compare(right_hist.get(ridx).endpoint_value_,
eq_cmp))) {
LOG_WARN("failed to compare histogram endpoint value", K(ret),
K(left_hist.get_buckets()[lidx].endpoint_value_), K(right_hist.get_buckets()[ridx].endpoint_value_));
K(left_hist.get(lidx).endpoint_value_), K(right_hist.get(ridx).endpoint_value_));
} else if (0 == eq_cmp) {
if (is_semi) {
rows += left_hist.get_buckets()[lidx].endpoint_repeat_count_;
rows += left_hist.get(lidx).endpoint_repeat_count_;
} else {
rows += left_hist.get_buckets()[lidx].endpoint_repeat_count_ * right_hist.get_buckets()[ridx].endpoint_repeat_count_;
rows += left_hist.get(lidx).endpoint_repeat_count_ * right_hist.get(ridx).endpoint_repeat_count_;
}
++lidx;
++ridx;

View File

@ -431,5 +431,20 @@ int ObAnalyzeStmtResolver::resolve_sample_clause_info(const ParseNode *sample_cl
return ret;
}
int ObAnalyzeStmtResolver::get_bucket_size(const ParseNode *node,
int64_t &bucket_num)
{
int ret = OB_SUCCESS;
if (NULL == node) {
// do nothing
} else if (T_INT != node->type_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected node type", K(ret));
} else {
bucket_num = node->value_;
}
return ret;
}
} /* namespace sql */
} /* namespace oceanbase */

View File

@ -50,6 +50,9 @@ private:
ObAnalyzeStmt &analyze_stmt);
int resolve_sample_clause_info(const ParseNode *sample_clause_node,
ObAnalyzeStmt &analyze_stmt);
int get_bucket_size(const ParseNode *node, int64_t &bucket_num);
// disallow copy
DISALLOW_COPY_AND_ASSIGN(ObAnalyzeStmtResolver);
};

View File

@ -680,7 +680,7 @@ TEST_F(TestOptEstSel, histogram)
num_elements.push_back(infos[i][2]);
}
init_histogram(ObHistType::FREQUENCY, 100, 0.0025,
init_histogram(allocator_, ObHistType::FREQUENCY, 100, 0.0025,
repeat_count, value, num_elements, opt_stat_.get_histogram());
run_test(test_file, result_file, tmp_file, 10);

View File

@ -403,6 +403,7 @@ void TestOptimizerUtils::run_test(const char* test_file,
}
void TestOptimizerUtils::init_histogram(
common::ObIAllocator &allocator,
const ObHistType type,
const double sample_size,
const double density,
@ -418,14 +419,13 @@ void TestOptimizerUtils::init_histogram(
hist.set_sample_size(sample_size);
hist.set_density(density);
int64_t bucket_cnt = 0;
ObSEArray<ObHistBucket, 4> tmp_buckets;
hist.prepare_allocate_buckets(allocator, repeat_count.count());
for (int64_t i = 0; i < repeat_count.count(); i++) {
ObHistBucket bucket(repeat_count.at(i), num_elements.at(i));
tmp_buckets.push_back(bucket);
tmp_buckets.at(tmp_buckets.count() - 1).endpoint_value_.set_int(value.at(i));
bucket_cnt += num_elements.at(i);
hist.get(i).endpoint_num_ = repeat_count.at(i);
hist.get(i).endpoint_repeat_count_ = num_elements.at(i);
hist.get(i).endpoint_value_.set_int(value.at(i));
}
hist.add_buckets(tmp_buckets);
hist.set_bucket_cnt(repeat_count.count());
}
void TestOptimizerUtils::run_fail_test(const char *test_file)

View File

@ -89,6 +89,7 @@ class TestOptimizerUtils : public TestSqlUtils, public ::testing::Test {
void formalize_tmp_file(const char *tmp_file);
void init_histogram(
common::ObIAllocator &allocator,
const ObHistType type,
const double sample_size,
const double density,