[FEAT MERGE] optimizer statistics gather enhance

Co-authored-by: Larry955 <1412857955@qq.com>
Co-authored-by: wangt1xiuyi <13547954130@163.com>
This commit is contained in:
obdev
2023-04-28 13:11:58 +00:00
committed by ob-robot
parent 35c1be5aa1
commit 642f1c7d84
130 changed files with 8572 additions and 1563 deletions

View File

@ -27,6 +27,8 @@
#include "sql/engine/sort/ob_sort_op_impl.h"
#include "sql/engine/expr/ob_expr_json_func_helper.h"
#include "sql/engine/expr/ob_expr_lob_utils.h"
#include "sql/engine/basic/ob_material_op_impl.h"
#include "share/stat/ob_hybrid_hist_estimator.h"
namespace oceanbase
{
@ -478,6 +480,203 @@ int ObAggregateProcessor::GroupConcatExtraResult::set_bool_mark(int64_t col_inde
return ret;
}
ObAggregateProcessor::HybridHistExtraResult::~HybridHistExtraResult()
{
if (sort_op_ != NULL) {
sort_op_->~ObSortOpImpl();
alloc_.free(sort_op_);
sort_op_ = NULL;
}
if (mat_op_ != NULL) {
mat_op_->~ObMaterialOpImpl();
alloc_.free(mat_op_);
mat_op_ = NULL;
}
}
void ObAggregateProcessor::HybridHistExtraResult::reuse_self()
{
if (sort_op_ != NULL) {
sort_op_->reuse();
}
if (mat_op_ != NULL) {
mat_op_->reuse();
}
sort_row_count_ = 0;
material_row_count_ = 0;
};
void ObAggregateProcessor::HybridHistExtraResult::reuse()
{
reuse_self();
ExtraResult::reuse();
}
int ObAggregateProcessor::HybridHistExtraResult::init(const uint64_t tenant_id,
const ObAggrInfo &aggr_info, ObEvalCtx &eval_ctx, const bool need_rewind,
ObIOEventObserver *io_event_observer, ObSqlWorkAreaProfile &profile,
ObMonitorNode &op_monitor_info)
{
int ret = OB_SUCCESS;
sort_row_count_ = 0;
material_row_count_ = 0;
if (OB_UNLIKELY(OB_INVALID_ID == tenant_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(tenant_id));
} else {
if (OB_ISNULL(sort_op_ = static_cast<ObSortOpImpl *>(alloc_.alloc(sizeof(ObSortOpImpl))))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fall to alloc buff", "size", sizeof(ObSortOpImpl));
} else {
new (sort_op_) ObSortOpImpl(op_monitor_info_);
if (OB_FAIL(sort_op_->init(tenant_id,
&aggr_info.sort_collations_,
&aggr_info.sort_cmp_funcs_,
&eval_ctx,
&eval_ctx.exec_ctx_,
false,
false,
need_rewind))) {
LOG_WARN("init sort_op_ failed");
} else {
sort_op_->set_io_event_observer(io_event_observer);
}
}
}
if (OB_SUCC(ret)) {
if (OB_ISNULL(mat_op_ = static_cast<ObMaterialOpImpl *>(alloc_.alloc(sizeof(ObMaterialOpImpl))))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fall to alloc buff", "size", sizeof(ObMaterialOpImpl));
} else {
new (mat_op_) ObMaterialOpImpl(op_monitor_info, profile);
if (OB_FAIL(mat_op_->init(tenant_id,
&eval_ctx,
&eval_ctx.exec_ctx_,
io_event_observer))) {
LOG_WARN("init mat_op_ failed");
}
}
}
if (OB_FAIL(ret)) {
if (NULL != sort_op_) {
sort_op_->~ObSortOpImpl();
alloc_.free(sort_op_);
sort_op_ = NULL;
}
if (NULL != mat_op_) {
mat_op_->~ObMaterialOpImpl();
alloc_.free(mat_op_);
mat_op_ = NULL;
}
}
return ret;
}
int ObAggregateProcessor::HybridHistExtraResult::add_sort_row(
const ObIArray<ObExpr *> &expr, ObEvalCtx &eval_ctx)
{
int ret = OB_SUCCESS;
if (OB_NOT_NULL(sort_op_)) {
if (OB_FAIL(sort_op_->add_row(expr))) {
LOG_WARN("failed to add row to sort op", K(expr));
} else {
++sort_row_count_;
}
}
return ret;
}
int ObAggregateProcessor::HybridHistExtraResult::add_sort_row(
const ObChunkDatumStore::StoredRow &sr)
{
int ret = OB_SUCCESS;
if (OB_NOT_NULL(sort_op_)) {
if (OB_FAIL(sort_op_->add_stored_row(sr))) {
LOG_WARN("failed to add row to sort op", K(sr));
} else {
++sort_row_count_;
}
}
return ret;
}
int ObAggregateProcessor::HybridHistExtraResult::get_next_row_from_sort(
const ObChunkDatumStore::StoredRow *&sr)
{
int ret = OB_SUCCESS;
sr = NULL;
if (OB_NOT_NULL(sort_op_)) {
ret = sort_op_->get_next_row(sr);
}
return ret;
}
int ObAggregateProcessor::HybridHistExtraResult::finish_add_sort_row()
{
int ret = OB_SUCCESS;
if (OB_NOT_NULL(sort_op_)) {
if (OB_FAIL(sort_op_->sort())) {
LOG_WARN("failed to sort rows");
}
}
return ret;
}
int ObAggregateProcessor::HybridHistExtraResult::add_material_row(
const ObDatum *src_datums,
const int64_t datum_cnt,
const int64_t extra_size,
const ObChunkDatumStore::StoredRow *&store_row)
{
int ret = OB_SUCCESS;
if (OB_NOT_NULL(mat_op_)) {
if (OB_FAIL(mat_op_->add_row(src_datums, datum_cnt, extra_size, store_row))) {
LOG_WARN("failed to add row to sort op", K(src_datums), K(datum_cnt));
} else {
++material_row_count_;
}
}
return ret;
}
int ObAggregateProcessor::HybridHistExtraResult::get_next_row_from_material(
const ObChunkDatumStore::StoredRow *&sr)
{
int ret = OB_SUCCESS;
sr = NULL;
if (OB_NOT_NULL(mat_op_)) {
ret = mat_op_->get_next_row(sr);
}
return ret;
}
int ObAggregateProcessor::HybridHistExtraResult::finish_add_material_row()
{
int ret = OB_SUCCESS;
if (OB_NOT_NULL(mat_op_)) {
if (OB_FAIL(mat_op_->finish_add_row())) {
LOG_WARN("failed to sort rows");
}
}
return ret;
}
int64_t ObAggregateProcessor::HybridHistExtraResult::to_string(
char *buf, const int64_t buf_len) const
{
int64_t pos = 0;
J_OBJ_START();
J_KV(K_(sort_row_count),
K_(material_row_count),
KP_(sort_op),
KP_(mat_op)
);
J_OBJ_END();
return pos;
}
int64_t ObAggregateProcessor::ExtraResult::to_string(char *buf,
const int64_t buf_len) const
{
@ -536,7 +735,8 @@ ObAggregateProcessor::DllUdfExtra::~DllUdfExtra()
ObAggregateProcessor::ObAggregateProcessor(ObEvalCtx &eval_ctx,
ObIArray<ObAggrInfo> &aggr_infos,
const lib::ObLabel &label)
const lib::ObLabel &label,
ObMonitorNode &op_monitor_info)
: has_distinct_(false),
has_order_by_(false),
has_group_concat_(false),
@ -569,7 +769,9 @@ ObAggregateProcessor::ObAggregateProcessor(ObEvalCtx &eval_ctx,
io_event_observer_(nullptr),
removal_info_(),
support_fast_single_row_agg_(false),
op_eval_infos_(nullptr)
op_eval_infos_(nullptr),
profile_(ObSqlWorkAreaType::HASH_WORK_AREA),
op_monitor_info_(op_monitor_info)
{
}
@ -1578,7 +1780,6 @@ int ObAggregateProcessor::generate_group_row(GroupRow *&new_group_row,
case T_FUN_KEEP_WM_CONCAT:
case T_FUN_WM_CONCAT:
case T_FUN_PL_AGG_UDF:
case T_FUN_HYBRID_HIST:
case T_FUN_JSON_ARRAYAGG:
case T_FUN_ORA_JSON_ARRAYAGG:
case T_FUN_JSON_OBJECTAGG:
@ -1627,6 +1828,27 @@ int ObAggregateProcessor::generate_group_row(GroupRow *&new_group_row,
}
break;
}
case T_FUN_HYBRID_HIST: {
void *tmp_buf = NULL;
if (OB_ISNULL(tmp_buf = aggr_alloc_.alloc(sizeof(HybridHistExtraResult)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory failed", "size", sizeof(HybridHistExtraResult));
} else {
HybridHistExtraResult *result = new (tmp_buf) HybridHistExtraResult(aggr_alloc_);
aggr_cell.set_extra(result);
const bool need_rewind = (in_window_func_ || group_id > 0);
if (OB_FAIL(result->init(eval_ctx_.exec_ctx_.get_my_session()->get_effective_tenant_id(),
aggr_info,
eval_ctx_,
need_rewind,
io_event_observer_,
profile_,
op_monitor_info_))) {
LOG_WARN("init hybrid hist extra result failed");
}
}
break;
}
case T_FUN_TOP_FRE_HIST: {
void *tmp_buf = NULL;
if (OB_ISNULL(tmp_buf = aggr_alloc_.alloc(sizeof(TopKFreHistExtraResult)))) {
@ -1971,7 +2193,6 @@ int ObAggregateProcessor::rollup_aggregation(AggrCell &aggr_cell, AggrCell &roll
case T_FUN_KEEP_WM_CONCAT:
case T_FUN_WM_CONCAT:
case T_FUN_PL_AGG_UDF:
case T_FUN_HYBRID_HIST:
case T_FUN_JSON_ARRAYAGG:
case T_FUN_ORA_JSON_ARRAYAGG:
case T_FUN_JSON_OBJECTAGG:
@ -2016,6 +2237,12 @@ int ObAggregateProcessor::rollup_aggregation(AggrCell &aggr_cell, AggrCell &roll
}
break;
}
case T_FUN_HYBRID_HIST: {
ret = OB_NOT_SUPPORTED;
LOG_WARN("rollup contain agg hybrid hist still not supported", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "rollup contain hybrid hist");
break;
}
case T_FUN_AGG_UDF: {
ret = OB_NOT_SUPPORTED;
LOG_WARN("rollup contain agg udfs still not supported", K(ret));
@ -2164,7 +2391,6 @@ int ObAggregateProcessor::prepare_aggr_result(const ObChunkDatumStore::StoredRow
case T_FUN_KEEP_WM_CONCAT:
case T_FUN_WM_CONCAT:
case T_FUN_PL_AGG_UDF:
case T_FUN_HYBRID_HIST:
case T_FUN_JSON_ARRAYAGG:
case T_FUN_ORA_JSON_ARRAYAGG:
case T_FUN_JSON_OBJECTAGG:
@ -2209,6 +2435,23 @@ int ObAggregateProcessor::prepare_aggr_result(const ObChunkDatumStore::StoredRow
}
break;
}
case T_FUN_HYBRID_HIST: {
HybridHistExtraResult *extra = NULL;
if (OB_ISNULL(extra = static_cast<HybridHistExtraResult *>(aggr_cell.get_extra()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("extra is null", K(aggr_cell));
} else {
extra->reuse_self();
if (param_exprs == NULL && OB_FAIL(extra->add_sort_row(stored_row))) {
LOG_WARN("fail to add row");
} else if (param_exprs != NULL && OB_FAIL(extra->add_sort_row(*param_exprs, eval_ctx_))) {
LOG_WARN("fail to add row");
} else {
LOG_DEBUG("succ to add row", K(stored_row), KPC(extra));
}
}
break;
}
case T_FUN_TOP_FRE_HIST: {
TopKFreHistExtraResult *extra = NULL;
if (OB_ISNULL(extra = static_cast<TopKFreHistExtraResult *>(aggr_cell.get_extra()))) {
@ -2386,7 +2629,6 @@ int ObAggregateProcessor::process_aggr_batch_result(
case T_FUN_KEEP_WM_CONCAT:
case T_FUN_WM_CONCAT:
case T_FUN_PL_AGG_UDF:
case T_FUN_HYBRID_HIST:
case T_FUN_JSON_ARRAYAGG:
case T_FUN_ORA_JSON_ARRAYAGG:
case T_FUN_JSON_OBJECTAGG:
@ -2420,6 +2662,16 @@ int ObAggregateProcessor::process_aggr_batch_result(
}
break;
}
case T_FUN_HYBRID_HIST: {
HybridHistExtraResult *extra_info = NULL;
if (OB_ISNULL(extra_info = static_cast<HybridHistExtraResult *>(aggr_cell.get_extra()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("extra is null", K(aggr_cell));
} else if (OB_FAIL(selector.add_batch(param_exprs, extra_info, eval_ctx_))) {
LOG_WARN("add batch failed");
}
break;
}
case T_FUN_TOP_FRE_HIST: {
TopKFreHistExtraResult *extra_info = NULL;
if (OB_ISNULL(extra_info = static_cast<TopKFreHistExtraResult *>(aggr_cell.get_extra()))) {
@ -2615,7 +2867,6 @@ int ObAggregateProcessor::process_aggr_result(const ObChunkDatumStore::StoredRow
case T_FUN_KEEP_WM_CONCAT:
case T_FUN_WM_CONCAT:
case T_FUN_PL_AGG_UDF:
case T_FUN_HYBRID_HIST:
case T_FUN_JSON_ARRAYAGG:
case T_FUN_ORA_JSON_ARRAYAGG:
case T_FUN_JSON_OBJECTAGG:
@ -2635,6 +2886,20 @@ int ObAggregateProcessor::process_aggr_result(const ObChunkDatumStore::StoredRow
}
break;
}
case T_FUN_HYBRID_HIST: {
HybridHistExtraResult *extra = NULL;
if (OB_ISNULL(extra = static_cast<HybridHistExtraResult *>(aggr_cell.get_extra()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("extra is null", K(aggr_cell));
} else if (param_exprs == NULL && OB_FAIL(extra->add_sort_row(stored_row))) {
LOG_WARN("fail to add row");
} else if (param_exprs != NULL && OB_FAIL(extra->add_sort_row(*param_exprs, eval_ctx_))) {
LOG_WARN("fail to add row");
} else {
LOG_DEBUG("succ to add row", K(stored_row), KPC(extra));
}
break;
}
case T_FUN_TOP_FRE_HIST: {
TopKFreHistExtraResult *extra = NULL;
if (OB_ISNULL(extra = static_cast<TopKFreHistExtraResult *>(aggr_cell.get_extra()))) {
@ -3486,7 +3751,7 @@ int ObAggregateProcessor::collect_aggr_result(
break;
}
case T_FUN_HYBRID_HIST: {
GroupConcatExtraResult *extra = static_cast<GroupConcatExtraResult *>(aggr_cell.get_extra());
HybridHistExtraResult *extra = static_cast<HybridHistExtraResult *>(aggr_cell.get_extra());
if (OB_FAIL(compute_hybrid_hist_result(aggr_info, extra, result))) {
LOG_WARN("failed to compute_hybrid_hist_result", K(ret));
} else {
@ -4431,6 +4696,25 @@ int ObAggregateProcessor::ObBatchRowsSlice::add_batch(
return ret;
}
int ObAggregateProcessor::ObBatchRowsSlice::add_batch(
const ObIArray<ObExpr *> *param_exprs,
HybridHistExtraResult *extra_info,
ObEvalCtx &eval_ctx
) const
{
int ret = OB_SUCCESS;
if (OB_NOT_NULL(extra_info->sort_op_)) {
int64_t stored_rows_count = 0;
if (OB_FAIL(extra_info->sort_op_->add_batch(
*param_exprs, *brs_->skip_, end_pos_, begin_pos_, &stored_rows_count))) {
LOG_WARN("failed to add batch");
} else {
extra_info->sort_row_count_ += stored_rows_count;
}
}
return ret;
}
int ObAggregateProcessor::ObSelector::add_batch(
const ObIArray<ObExpr *> *param_exprs,
ObSortOpImpl *unique_sort_op,
@ -4463,6 +4747,24 @@ int ObAggregateProcessor::ObSelector::add_batch(
return ret;
}
int ObAggregateProcessor::ObSelector::add_batch(
const ObIArray<ObExpr *> *param_exprs,
HybridHistExtraResult *extra_info,
ObEvalCtx &eval_ctx
) const
{
int ret = OB_SUCCESS;
if (OB_NOT_NULL(extra_info->sort_op_)) {
if (OB_FAIL(extra_info->sort_op_->add_batch(
*param_exprs, *brs_->skip_, brs_->size_, selector_array_, count_))) {
LOG_WARN("failed to add batch");
} else {
extra_info->sort_row_count_ += count_;
}
}
return ret;
}
template <typename T>
int ObAggregateProcessor::top_fre_hist_calc_batch(
const ObAggrInfo &aggr_info,
@ -5010,15 +5312,10 @@ int ObAggregateProcessor::llc_add_value(const uint64_t value, char *llc_bitmap_b
pmax = ObExprEstimateNdv::llc_leading_zeros(value << LLC_BUCKET_BITS, 64 - LLC_BUCKET_BITS) + 1;
}
ObString::obstr_size_t llc_num_buckets = size;
if (OB_UNLIKELY(size != get_llc_size())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("buffer size don't match", K(size), K(get_llc_size()));
} else if (OB_UNLIKELY(!ObExprEstimateNdv::llc_is_num_buckets_valid(llc_num_buckets))
|| OB_UNLIKELY(llc_num_buckets <= bucket_index)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("llc_add_value failed because number of buckets is not valid",
K(llc_num_buckets), K(bucket_index), K(ret));
} else if (pmax > static_cast<uint8_t>(llc_bitmap_buf[bucket_index])) {
OB_ASSERT(size == get_llc_size());
OB_ASSERT(ObExprEstimateNdv::llc_is_num_buckets_valid(llc_num_buckets));
OB_ASSERT(llc_num_buckets > bucket_index);
if (pmax > static_cast<uint8_t>(llc_bitmap_buf[bucket_index])) {
// 理论上pmax不会超过65.
llc_bitmap_buf[bucket_index] = static_cast<uint8_t>(pmax);
}
@ -5633,19 +5930,21 @@ int ObAggregateProcessor::convert_datum_to_obj(const ObAggrInfo &aggr_info,
}
int ObAggregateProcessor::compute_hybrid_hist_result(const ObAggrInfo &aggr_info,
GroupConcatExtraResult *&extra,
HybridHistExtraResult *&extra,
ObDatum &result)
{
int ret = OB_SUCCESS;
ObHybridHistograms hybrid_hist;
ObSEArray<BucketNode, 4> bucket_nodes;
ObDatum *bucket_num_result = NULL;
int64_t bucket_num = 0;
int64_t num_distinct = 0;
int64_t null_count = 0;
int64_t total_count = 0;
int64_t pop_count = 0;
int64_t pop_freq = 0;
if (OB_ISNULL(extra) || OB_ISNULL(aggr_info.bucket_num_param_expr_) ||
OB_UNLIKELY(extra->empty() || aggr_info.param_exprs_.count() != 1 ||
OB_UNLIKELY(extra->get_sort_row_count() == 0 ||
aggr_info.param_exprs_.count() != 1 ||
aggr_info.sort_collations_.count() != 1 ||
!aggr_info.bucket_num_param_expr_->obj_meta_.is_numeric_type())) {
ret = OB_ERR_UNEXPECTED;
@ -5662,45 +5961,63 @@ int ObAggregateProcessor::compute_hybrid_hist_result(const ObAggrInfo &aggr_info
} else if (bucket_num <= 0) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("get invalid argument", K(ret), K(bucket_num));
} else if (extra->is_iterated() && OB_FAIL(extra->rewind())) {
LOG_WARN("rewind failed", KPC(extra), K(ret));
} else if (!extra->is_iterated() && OB_FAIL(extra->finish_add_row())) {
} else if ( OB_FAIL(extra->finish_add_sort_row())) {
LOG_WARN("finish_add_row failed", KPC(extra), K(ret));
} else {
ObObj pre_obj;
pre_obj.set_null();
ObChunkDatumStore::LastStoredRow prev_row(aggr_alloc_);
const int64_t extra_size = sizeof(BucketDesc);
int64_t repeat_count = 0;
const ObChunkDatumStore::StoredRow *stored_row = NULL;
while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row(stored_row))) {
ObObj cur_obj;
const ObChunkDatumStore::StoredRow *mat_stored_row = NULL;
// get null count
while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row_from_sort(stored_row))) {
if (OB_ISNULL(stored_row) || OB_UNLIKELY(stored_row->cnt_ != 1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret), K(stored_row));
} else if (OB_FAIL(stored_row->cells()[0].to_obj(cur_obj,
aggr_info.param_exprs_.at(0)->obj_meta_))) {
LOG_WARN("failed to obj", K(ret));
} else if (stored_row->cells()[0].is_null()) {
++ null_count;
} else if (OB_FAIL(prev_row.save_store_row(*stored_row))) {
LOG_WARN("failed to deep copy limit last rows", K(ret));
} else {
++ total_count;
if (cur_obj.is_null()) {
++ null_count;
} else if (!pre_obj.is_null() &&
pre_obj.is_equal(cur_obj, aggr_info.sort_collations_.at(0).cs_type_)) {
++ repeat_count;
} else {
// new value is found
if (repeat_count > 0 && !pre_obj.is_null()) {
if (OB_FAIL(bucket_nodes.push_back(BucketNode(pre_obj, repeat_count)))) {
LOG_WARN("failed to push back bucket node", K(ret));
} else {/*do nothing*/}
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ob_write_obj(aggr_alloc_, cur_obj, pre_obj))) {
LOG_WARN("fail to clone cell", K(ret), K(cur_obj));
} else {
repeat_count = 1;
++ num_distinct;
}
repeat_count = 1;
++ num_distinct;
break;
}
}
total_count = extra->get_sort_row_count() - null_count;
int64_t pop_threshold = total_count / bucket_num;
// get all bucket node and store them into chunk datum store
while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row_from_sort(stored_row))) {
bool is_equal = false;
if (OB_ISNULL(stored_row) || OB_UNLIKELY(stored_row->cnt_ != 1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(stored_row));
} else if (OB_FAIL(check_rows_equal(prev_row, *stored_row, aggr_info, is_equal))) {
LOG_WARN("failed to is order by item equal with prev row", K(ret));
} else if (is_equal) {
++ repeat_count;
} else if (OB_ISNULL(prev_row.store_row_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(prev_row.store_row_));
} else if (OB_FAIL(extra->add_material_row(prev_row.store_row_->cells(),
prev_row.store_row_->cnt_,
extra_size, mat_stored_row))) {
LOG_WARN("failed to add material row");
} else if (OB_FAIL(prev_row.save_store_row(*stored_row))) {
LOG_WARN("failed to deep copy limit last rows", K(ret));
} else if (OB_ISNULL(mat_stored_row)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(mat_stored_row));
} else {
BucketDesc *desc = reinterpret_cast<BucketDesc*>(mat_stored_row->get_extra_payload());
desc->ep_count_ = repeat_count;
desc->is_pop_ = repeat_count > pop_threshold;
if (desc->is_pop_) {
pop_freq += repeat_count;
++ pop_count;
}
repeat_count = 1;
++ num_distinct;
}
}
if (ret != OB_ITER_END && ret != OB_SUCCESS) {
@ -5708,15 +6025,35 @@ int ObAggregateProcessor::compute_hybrid_hist_result(const ObAggrInfo &aggr_info
} else {
ret = OB_SUCCESS;
bool has_lob_header = aggr_info.expr_->obj_meta_.has_lob_header();
if (!pre_obj.is_null() &&
OB_FAIL(bucket_nodes.push_back(BucketNode(pre_obj, repeat_count)))) {
LOG_WARN("failed to add the last value into bucket", K(ret));
} else if (OB_FAIL(hybrid_hist.build_hybrid_hist(bucket_nodes,
bucket_num,
total_count - null_count,
num_distinct))) {
LOG_WARN("failed to build hybrid hist", K(ret));
} else if (OB_FAIL(get_hybrid_hist_result(hybrid_hist, has_lob_header, result))) {
if (prev_row.store_row_ != nullptr) {
if (OB_ISNULL(prev_row.store_row_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(prev_row.store_row_));
} else if (OB_FAIL(extra->add_material_row(prev_row.store_row_->cells(),
prev_row.store_row_->cnt_,
extra_size, mat_stored_row))) {
LOG_WARN("failed to add material row");
} else if (OB_ISNULL(mat_stored_row)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(mat_stored_row));
} else {
BucketDesc *desc = reinterpret_cast<BucketDesc*>(mat_stored_row->get_extra_payload());
desc->ep_count_ = repeat_count;
desc->is_pop_ = repeat_count > pop_threshold;
if (desc->is_pop_) {
pop_freq += repeat_count;
++ pop_count;
}
}
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(extra->finish_add_material_row())) {
LOG_WARN("failed to finish add material row", K(ret));
} else if (OB_FAIL(hybrid_hist.build_hybrid_hist(extra, &aggr_alloc_, bucket_num, total_count,
num_distinct, pop_count, pop_freq,
aggr_info.param_exprs_.at(0)->obj_meta_))) {
LOG_WARN("failed to build hybrid hist", K(ret), K(&aggr_alloc_));
} else if (OB_FAIL(get_hybrid_hist_result(&hybrid_hist, has_lob_header, result))) {
LOG_WARN("failed to get hybrid hist result", K(ret));
} else {/*do nothing*/}
}
@ -5724,21 +6061,24 @@ int ObAggregateProcessor::compute_hybrid_hist_result(const ObAggrInfo &aggr_info
return ret;
}
int ObAggregateProcessor::get_hybrid_hist_result(ObHybridHistograms &hybrid_hist,
int ObAggregateProcessor::get_hybrid_hist_result(ObHybridHistograms *hybrid_hist,
bool has_lob_header,
ObDatum &result_datum)
{
int ret = OB_SUCCESS;
if (hybrid_hist.get_buckets().count() > 0) {
if (OB_ISNULL(hybrid_hist)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get null hybrid histograms", K(ret));
} else if (hybrid_hist->get_buckets().count() > 0) {
char *buf = NULL;
int64_t buf_size = hybrid_hist.get_serialize_size();
int64_t buf_size = hybrid_hist->get_serialize_size();
int64_t buf_pos = 0;
ObTextStringResult new_tmp_lob(ObLongTextType, has_lob_header, &aggr_alloc_);
if (OB_FAIL(new_tmp_lob.init(buf_size))) {
LOG_WARN("tmp lob init failed", K(ret), K(buf_size));
} else if (OB_FAIL(new_tmp_lob.get_reserved_buffer(buf, buf_size))) {
LOG_WARN("tmp lob append failed", K(ret), K(new_tmp_lob));
} else if (OB_FAIL(hybrid_hist.serialize(buf, buf_size, buf_pos))) {
} else if (OB_FAIL(hybrid_hist->serialize(buf, buf_size, buf_pos))) {
LOG_WARN("fail serialize init task arg", KP(buf), K(buf_size), K(buf_pos), K(ret));
} else if (OB_FAIL(new_tmp_lob.lseek(buf_pos, 0))) {
LOG_WARN("temp lob lseek failed", K(ret), K(new_tmp_lob), K(buf_pos));
@ -5746,11 +6086,11 @@ int ObAggregateProcessor::get_hybrid_hist_result(ObHybridHistograms &hybrid_hist
ObString lob_loc_str;
new_tmp_lob.get_result_buffer(lob_loc_str);
result_datum.set_string(lob_loc_str);
LOG_TRACE("succeed to get topK fre hist result", K(result_datum), K(hybrid_hist));
LOG_TRACE("succeed to get hybrid hist result", K(result_datum), KPC(hybrid_hist));
}
} else {
result_datum.set_null();
LOG_TRACE("succeed to get topK fre hist result", K(result_datum), K(hybrid_hist));
LOG_TRACE("succeed to get hybrid hist result", K(result_datum), KPC(hybrid_hist));
}
return ret;
}

View File

@ -28,13 +28,18 @@
#include "sql/engine/user_defined_function/ob_pl_user_defined_agg_function.h"
#include "sql/engine/expr/ob_expr_dll_udf.h"
#include "sql/engine/expr/ob_rt_datum_arith.h"
#include "share/stat/ob_hybrid_hist_estimator.h"
namespace oceanbase
{
namespace common
{
class ObHybridHistograms;
}
namespace sql
{
class ObMaterialOpImpl;
struct RemovalInfo
{
RemovalInfo()
@ -375,6 +380,49 @@ public:
common::ObFixedArray<bool, common::ObIAllocator> bool_mark_;
};
class HybridHistExtraResult : public ExtraResult
{
public:
explicit HybridHistExtraResult(common::ObIAllocator &alloc)
: ExtraResult(alloc),
sort_row_count_(0),
material_row_count_(0),
sort_op_(nullptr),
mat_op_(nullptr)
{}
virtual ~HybridHistExtraResult();
void reuse_self();
virtual void reuse() override;
int init(const uint64_t tenant_id, const ObAggrInfo &aggr_info,
ObEvalCtx &eval_ctx, const bool need_rewind,
ObIOEventObserver *io_event_observer, ObSqlWorkAreaProfile &profile,
ObMonitorNode &op_monitor_info);
int add_sort_row(const ObIArray<ObExpr *> &expr, ObEvalCtx &eval_ctx);
int add_sort_row(const ObChunkDatumStore::StoredRow &sr);
int get_next_row_from_sort(const ObChunkDatumStore::StoredRow *&sr);
int finish_add_sort_row();
int add_material_row(const ObDatum *src_datums,
const int64_t datum_cnt,
const int64_t extra_size,
const ObChunkDatumStore::StoredRow *&store_row);
int get_next_row_from_material(const ObChunkDatumStore::StoredRow *&sr);
int finish_add_material_row();
// bool empty() const { return 0 == row_count_; }
int64_t get_sort_row_count() const { return sort_row_count_; }
int64_t get_material_row_count() const { return material_row_count_; }
DECLARE_VIRTUAL_TO_STRING;
public:
int64_t sort_row_count_;
int64_t material_row_count_;
ObSortOpImpl *sort_op_;
ObMaterialOpImpl *mat_op_;
};
struct DllUdfExtra : public ExtraResult
{
explicit DllUdfExtra(common::ObIAllocator &alloc)
@ -476,6 +524,9 @@ public:
uint16_t get_batch_index(uint16_t i) const { return selector_array_[i]; }
int add_batch(const ObIArray<ObExpr *> *param_exprs, ObSortOpImpl *unique_sort_op,
GroupConcatExtraResult *extra_info, ObEvalCtx &eval_ctx) const;
int add_batch(const ObIArray<ObExpr *> *param_exprs,
HybridHistExtraResult *extra_info,
ObEvalCtx &eval_ctx) const;
TO_STRING_KV(K_(count));
const ObBatchRows *brs_;
const uint16_t *selector_array_;
@ -509,6 +560,9 @@ public:
uint16_t get_batch_index(uint16_t i) const { return i; }
int add_batch(const ObIArray<ObExpr *> *param_exprs, ObSortOpImpl *unique_sort_op,
GroupConcatExtraResult *extra_info, ObEvalCtx &eval_ctx) const;
int add_batch(const ObIArray<ObExpr *> *param_exprs,
HybridHistExtraResult *extra_info,
ObEvalCtx &eval_ctx) const;
TO_STRING_KV(K_(begin_pos), K_(end_pos));
const ObBatchRows *brs_;
uint16_t begin_pos_;
@ -548,7 +602,8 @@ public:
public:
ObAggregateProcessor(ObEvalCtx &eval_ctx,
ObIArray<ObAggrInfo> &aggr_infos,
const lib::ObLabel &label);
const lib::ObLabel &label,
ObMonitorNode &op_monitor_info);
~ObAggregateProcessor() { destroy(); };
int init();
@ -855,10 +910,10 @@ private:
ObDatum &result_datum);
int compute_hybrid_hist_result(const ObAggrInfo &aggr_info,
GroupConcatExtraResult *&extra,
HybridHistExtraResult *&extra,
ObDatum &result);
int get_hybrid_hist_result(ObHybridHistograms &hybrid_hist,
int get_hybrid_hist_result(ObHybridHistograms *hybrid_hist,
bool has_lob_header,
ObDatum &result_datum);
@ -971,6 +1026,8 @@ private:
RemovalInfo removal_info_;
bool support_fast_single_row_agg_;
ObIArray<ObEvalInfo *> *op_eval_infos_;
ObSqlWorkAreaProfile profile_;
ObMonitorNode &op_monitor_info_;
};
struct ObAggregateCalcFunc

View File

@ -68,7 +68,8 @@ public:
: ObOperator(exec_ctx, spec, input),
aggr_processor_(eval_ctx_,
(static_cast<ObGroupBySpec &>(const_cast<ObOpSpec &>(spec))).aggr_infos_,
ObModIds::OB_SQL_AGGR_FUNC_ROW)
ObModIds::OB_SQL_AGGR_FUNC_ROW,
op_monitor_info_)
{
}
inline ObAggregateProcessor &get_aggr_processor() { return aggr_processor_; }