[FEAT MERGE] optimizer statistics gather enhance
Co-authored-by: Larry955 <1412857955@qq.com> Co-authored-by: wangt1xiuyi <13547954130@163.com>
This commit is contained in:
@ -27,6 +27,8 @@
|
||||
#include "sql/engine/sort/ob_sort_op_impl.h"
|
||||
#include "sql/engine/expr/ob_expr_json_func_helper.h"
|
||||
#include "sql/engine/expr/ob_expr_lob_utils.h"
|
||||
#include "sql/engine/basic/ob_material_op_impl.h"
|
||||
#include "share/stat/ob_hybrid_hist_estimator.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
@ -478,6 +480,203 @@ int ObAggregateProcessor::GroupConcatExtraResult::set_bool_mark(int64_t col_inde
|
||||
return ret;
|
||||
}
|
||||
|
||||
ObAggregateProcessor::HybridHistExtraResult::~HybridHistExtraResult()
|
||||
{
|
||||
if (sort_op_ != NULL) {
|
||||
sort_op_->~ObSortOpImpl();
|
||||
alloc_.free(sort_op_);
|
||||
sort_op_ = NULL;
|
||||
}
|
||||
if (mat_op_ != NULL) {
|
||||
mat_op_->~ObMaterialOpImpl();
|
||||
alloc_.free(mat_op_);
|
||||
mat_op_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void ObAggregateProcessor::HybridHistExtraResult::reuse_self()
|
||||
{
|
||||
if (sort_op_ != NULL) {
|
||||
sort_op_->reuse();
|
||||
}
|
||||
if (mat_op_ != NULL) {
|
||||
mat_op_->reuse();
|
||||
}
|
||||
sort_row_count_ = 0;
|
||||
material_row_count_ = 0;
|
||||
};
|
||||
|
||||
void ObAggregateProcessor::HybridHistExtraResult::reuse()
|
||||
{
|
||||
reuse_self();
|
||||
ExtraResult::reuse();
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::HybridHistExtraResult::init(const uint64_t tenant_id,
|
||||
const ObAggrInfo &aggr_info, ObEvalCtx &eval_ctx, const bool need_rewind,
|
||||
ObIOEventObserver *io_event_observer, ObSqlWorkAreaProfile &profile,
|
||||
ObMonitorNode &op_monitor_info)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
sort_row_count_ = 0;
|
||||
material_row_count_ = 0;
|
||||
if (OB_UNLIKELY(OB_INVALID_ID == tenant_id)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", K(tenant_id));
|
||||
} else {
|
||||
if (OB_ISNULL(sort_op_ = static_cast<ObSortOpImpl *>(alloc_.alloc(sizeof(ObSortOpImpl))))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("fall to alloc buff", "size", sizeof(ObSortOpImpl));
|
||||
} else {
|
||||
new (sort_op_) ObSortOpImpl(op_monitor_info_);
|
||||
if (OB_FAIL(sort_op_->init(tenant_id,
|
||||
&aggr_info.sort_collations_,
|
||||
&aggr_info.sort_cmp_funcs_,
|
||||
&eval_ctx,
|
||||
&eval_ctx.exec_ctx_,
|
||||
false,
|
||||
false,
|
||||
need_rewind))) {
|
||||
LOG_WARN("init sort_op_ failed");
|
||||
} else {
|
||||
sort_op_->set_io_event_observer(io_event_observer);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
if (OB_ISNULL(mat_op_ = static_cast<ObMaterialOpImpl *>(alloc_.alloc(sizeof(ObMaterialOpImpl))))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("fall to alloc buff", "size", sizeof(ObMaterialOpImpl));
|
||||
} else {
|
||||
new (mat_op_) ObMaterialOpImpl(op_monitor_info, profile);
|
||||
if (OB_FAIL(mat_op_->init(tenant_id,
|
||||
&eval_ctx,
|
||||
&eval_ctx.exec_ctx_,
|
||||
io_event_observer))) {
|
||||
LOG_WARN("init mat_op_ failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_FAIL(ret)) {
|
||||
if (NULL != sort_op_) {
|
||||
sort_op_->~ObSortOpImpl();
|
||||
alloc_.free(sort_op_);
|
||||
sort_op_ = NULL;
|
||||
}
|
||||
if (NULL != mat_op_) {
|
||||
mat_op_->~ObMaterialOpImpl();
|
||||
alloc_.free(mat_op_);
|
||||
mat_op_ = NULL;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::HybridHistExtraResult::add_sort_row(
|
||||
const ObIArray<ObExpr *> &expr, ObEvalCtx &eval_ctx)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_NOT_NULL(sort_op_)) {
|
||||
if (OB_FAIL(sort_op_->add_row(expr))) {
|
||||
LOG_WARN("failed to add row to sort op", K(expr));
|
||||
} else {
|
||||
++sort_row_count_;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::HybridHistExtraResult::add_sort_row(
|
||||
const ObChunkDatumStore::StoredRow &sr)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_NOT_NULL(sort_op_)) {
|
||||
if (OB_FAIL(sort_op_->add_stored_row(sr))) {
|
||||
LOG_WARN("failed to add row to sort op", K(sr));
|
||||
} else {
|
||||
++sort_row_count_;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::HybridHistExtraResult::get_next_row_from_sort(
|
||||
const ObChunkDatumStore::StoredRow *&sr)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
sr = NULL;
|
||||
if (OB_NOT_NULL(sort_op_)) {
|
||||
ret = sort_op_->get_next_row(sr);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::HybridHistExtraResult::finish_add_sort_row()
|
||||
{
|
||||
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_NOT_NULL(sort_op_)) {
|
||||
if (OB_FAIL(sort_op_->sort())) {
|
||||
LOG_WARN("failed to sort rows");
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::HybridHistExtraResult::add_material_row(
|
||||
const ObDatum *src_datums,
|
||||
const int64_t datum_cnt,
|
||||
const int64_t extra_size,
|
||||
const ObChunkDatumStore::StoredRow *&store_row)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_NOT_NULL(mat_op_)) {
|
||||
if (OB_FAIL(mat_op_->add_row(src_datums, datum_cnt, extra_size, store_row))) {
|
||||
LOG_WARN("failed to add row to sort op", K(src_datums), K(datum_cnt));
|
||||
} else {
|
||||
++material_row_count_;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
int ObAggregateProcessor::HybridHistExtraResult::get_next_row_from_material(
|
||||
const ObChunkDatumStore::StoredRow *&sr)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
sr = NULL;
|
||||
if (OB_NOT_NULL(mat_op_)) {
|
||||
ret = mat_op_->get_next_row(sr);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::HybridHistExtraResult::finish_add_material_row()
|
||||
{
|
||||
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_NOT_NULL(mat_op_)) {
|
||||
if (OB_FAIL(mat_op_->finish_add_row())) {
|
||||
LOG_WARN("failed to sort rows");
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int64_t ObAggregateProcessor::HybridHistExtraResult::to_string(
|
||||
char *buf, const int64_t buf_len) const
|
||||
{
|
||||
int64_t pos = 0;
|
||||
J_OBJ_START();
|
||||
J_KV(K_(sort_row_count),
|
||||
K_(material_row_count),
|
||||
KP_(sort_op),
|
||||
KP_(mat_op)
|
||||
);
|
||||
J_OBJ_END();
|
||||
return pos;
|
||||
}
|
||||
|
||||
int64_t ObAggregateProcessor::ExtraResult::to_string(char *buf,
|
||||
const int64_t buf_len) const
|
||||
{
|
||||
@ -536,7 +735,8 @@ ObAggregateProcessor::DllUdfExtra::~DllUdfExtra()
|
||||
|
||||
ObAggregateProcessor::ObAggregateProcessor(ObEvalCtx &eval_ctx,
|
||||
ObIArray<ObAggrInfo> &aggr_infos,
|
||||
const lib::ObLabel &label)
|
||||
const lib::ObLabel &label,
|
||||
ObMonitorNode &op_monitor_info)
|
||||
: has_distinct_(false),
|
||||
has_order_by_(false),
|
||||
has_group_concat_(false),
|
||||
@ -569,7 +769,9 @@ ObAggregateProcessor::ObAggregateProcessor(ObEvalCtx &eval_ctx,
|
||||
io_event_observer_(nullptr),
|
||||
removal_info_(),
|
||||
support_fast_single_row_agg_(false),
|
||||
op_eval_infos_(nullptr)
|
||||
op_eval_infos_(nullptr),
|
||||
profile_(ObSqlWorkAreaType::HASH_WORK_AREA),
|
||||
op_monitor_info_(op_monitor_info)
|
||||
{
|
||||
}
|
||||
|
||||
@ -1578,7 +1780,6 @@ int ObAggregateProcessor::generate_group_row(GroupRow *&new_group_row,
|
||||
case T_FUN_KEEP_WM_CONCAT:
|
||||
case T_FUN_WM_CONCAT:
|
||||
case T_FUN_PL_AGG_UDF:
|
||||
case T_FUN_HYBRID_HIST:
|
||||
case T_FUN_JSON_ARRAYAGG:
|
||||
case T_FUN_ORA_JSON_ARRAYAGG:
|
||||
case T_FUN_JSON_OBJECTAGG:
|
||||
@ -1627,6 +1828,27 @@ int ObAggregateProcessor::generate_group_row(GroupRow *&new_group_row,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case T_FUN_HYBRID_HIST: {
|
||||
void *tmp_buf = NULL;
|
||||
if (OB_ISNULL(tmp_buf = aggr_alloc_.alloc(sizeof(HybridHistExtraResult)))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("allocate memory failed", "size", sizeof(HybridHistExtraResult));
|
||||
} else {
|
||||
HybridHistExtraResult *result = new (tmp_buf) HybridHistExtraResult(aggr_alloc_);
|
||||
aggr_cell.set_extra(result);
|
||||
const bool need_rewind = (in_window_func_ || group_id > 0);
|
||||
if (OB_FAIL(result->init(eval_ctx_.exec_ctx_.get_my_session()->get_effective_tenant_id(),
|
||||
aggr_info,
|
||||
eval_ctx_,
|
||||
need_rewind,
|
||||
io_event_observer_,
|
||||
profile_,
|
||||
op_monitor_info_))) {
|
||||
LOG_WARN("init hybrid hist extra result failed");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case T_FUN_TOP_FRE_HIST: {
|
||||
void *tmp_buf = NULL;
|
||||
if (OB_ISNULL(tmp_buf = aggr_alloc_.alloc(sizeof(TopKFreHistExtraResult)))) {
|
||||
@ -1971,7 +2193,6 @@ int ObAggregateProcessor::rollup_aggregation(AggrCell &aggr_cell, AggrCell &roll
|
||||
case T_FUN_KEEP_WM_CONCAT:
|
||||
case T_FUN_WM_CONCAT:
|
||||
case T_FUN_PL_AGG_UDF:
|
||||
case T_FUN_HYBRID_HIST:
|
||||
case T_FUN_JSON_ARRAYAGG:
|
||||
case T_FUN_ORA_JSON_ARRAYAGG:
|
||||
case T_FUN_JSON_OBJECTAGG:
|
||||
@ -2016,6 +2237,12 @@ int ObAggregateProcessor::rollup_aggregation(AggrCell &aggr_cell, AggrCell &roll
|
||||
}
|
||||
break;
|
||||
}
|
||||
case T_FUN_HYBRID_HIST: {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
LOG_WARN("rollup contain agg hybrid hist still not supported", K(ret));
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "rollup contain hybrid hist");
|
||||
break;
|
||||
}
|
||||
case T_FUN_AGG_UDF: {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
LOG_WARN("rollup contain agg udfs still not supported", K(ret));
|
||||
@ -2164,7 +2391,6 @@ int ObAggregateProcessor::prepare_aggr_result(const ObChunkDatumStore::StoredRow
|
||||
case T_FUN_KEEP_WM_CONCAT:
|
||||
case T_FUN_WM_CONCAT:
|
||||
case T_FUN_PL_AGG_UDF:
|
||||
case T_FUN_HYBRID_HIST:
|
||||
case T_FUN_JSON_ARRAYAGG:
|
||||
case T_FUN_ORA_JSON_ARRAYAGG:
|
||||
case T_FUN_JSON_OBJECTAGG:
|
||||
@ -2209,6 +2435,23 @@ int ObAggregateProcessor::prepare_aggr_result(const ObChunkDatumStore::StoredRow
|
||||
}
|
||||
break;
|
||||
}
|
||||
case T_FUN_HYBRID_HIST: {
|
||||
HybridHistExtraResult *extra = NULL;
|
||||
if (OB_ISNULL(extra = static_cast<HybridHistExtraResult *>(aggr_cell.get_extra()))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("extra is null", K(aggr_cell));
|
||||
} else {
|
||||
extra->reuse_self();
|
||||
if (param_exprs == NULL && OB_FAIL(extra->add_sort_row(stored_row))) {
|
||||
LOG_WARN("fail to add row");
|
||||
} else if (param_exprs != NULL && OB_FAIL(extra->add_sort_row(*param_exprs, eval_ctx_))) {
|
||||
LOG_WARN("fail to add row");
|
||||
} else {
|
||||
LOG_DEBUG("succ to add row", K(stored_row), KPC(extra));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case T_FUN_TOP_FRE_HIST: {
|
||||
TopKFreHistExtraResult *extra = NULL;
|
||||
if (OB_ISNULL(extra = static_cast<TopKFreHistExtraResult *>(aggr_cell.get_extra()))) {
|
||||
@ -2386,7 +2629,6 @@ int ObAggregateProcessor::process_aggr_batch_result(
|
||||
case T_FUN_KEEP_WM_CONCAT:
|
||||
case T_FUN_WM_CONCAT:
|
||||
case T_FUN_PL_AGG_UDF:
|
||||
case T_FUN_HYBRID_HIST:
|
||||
case T_FUN_JSON_ARRAYAGG:
|
||||
case T_FUN_ORA_JSON_ARRAYAGG:
|
||||
case T_FUN_JSON_OBJECTAGG:
|
||||
@ -2420,6 +2662,16 @@ int ObAggregateProcessor::process_aggr_batch_result(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case T_FUN_HYBRID_HIST: {
|
||||
HybridHistExtraResult *extra_info = NULL;
|
||||
if (OB_ISNULL(extra_info = static_cast<HybridHistExtraResult *>(aggr_cell.get_extra()))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("extra is null", K(aggr_cell));
|
||||
} else if (OB_FAIL(selector.add_batch(param_exprs, extra_info, eval_ctx_))) {
|
||||
LOG_WARN("add batch failed");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case T_FUN_TOP_FRE_HIST: {
|
||||
TopKFreHistExtraResult *extra_info = NULL;
|
||||
if (OB_ISNULL(extra_info = static_cast<TopKFreHistExtraResult *>(aggr_cell.get_extra()))) {
|
||||
@ -2615,7 +2867,6 @@ int ObAggregateProcessor::process_aggr_result(const ObChunkDatumStore::StoredRow
|
||||
case T_FUN_KEEP_WM_CONCAT:
|
||||
case T_FUN_WM_CONCAT:
|
||||
case T_FUN_PL_AGG_UDF:
|
||||
case T_FUN_HYBRID_HIST:
|
||||
case T_FUN_JSON_ARRAYAGG:
|
||||
case T_FUN_ORA_JSON_ARRAYAGG:
|
||||
case T_FUN_JSON_OBJECTAGG:
|
||||
@ -2635,6 +2886,20 @@ int ObAggregateProcessor::process_aggr_result(const ObChunkDatumStore::StoredRow
|
||||
}
|
||||
break;
|
||||
}
|
||||
case T_FUN_HYBRID_HIST: {
|
||||
HybridHistExtraResult *extra = NULL;
|
||||
if (OB_ISNULL(extra = static_cast<HybridHistExtraResult *>(aggr_cell.get_extra()))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("extra is null", K(aggr_cell));
|
||||
} else if (param_exprs == NULL && OB_FAIL(extra->add_sort_row(stored_row))) {
|
||||
LOG_WARN("fail to add row");
|
||||
} else if (param_exprs != NULL && OB_FAIL(extra->add_sort_row(*param_exprs, eval_ctx_))) {
|
||||
LOG_WARN("fail to add row");
|
||||
} else {
|
||||
LOG_DEBUG("succ to add row", K(stored_row), KPC(extra));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case T_FUN_TOP_FRE_HIST: {
|
||||
TopKFreHistExtraResult *extra = NULL;
|
||||
if (OB_ISNULL(extra = static_cast<TopKFreHistExtraResult *>(aggr_cell.get_extra()))) {
|
||||
@ -3486,7 +3751,7 @@ int ObAggregateProcessor::collect_aggr_result(
|
||||
break;
|
||||
}
|
||||
case T_FUN_HYBRID_HIST: {
|
||||
GroupConcatExtraResult *extra = static_cast<GroupConcatExtraResult *>(aggr_cell.get_extra());
|
||||
HybridHistExtraResult *extra = static_cast<HybridHistExtraResult *>(aggr_cell.get_extra());
|
||||
if (OB_FAIL(compute_hybrid_hist_result(aggr_info, extra, result))) {
|
||||
LOG_WARN("failed to compute_hybrid_hist_result", K(ret));
|
||||
} else {
|
||||
@ -4431,6 +4696,25 @@ int ObAggregateProcessor::ObBatchRowsSlice::add_batch(
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::ObBatchRowsSlice::add_batch(
|
||||
const ObIArray<ObExpr *> *param_exprs,
|
||||
HybridHistExtraResult *extra_info,
|
||||
ObEvalCtx &eval_ctx
|
||||
) const
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_NOT_NULL(extra_info->sort_op_)) {
|
||||
int64_t stored_rows_count = 0;
|
||||
if (OB_FAIL(extra_info->sort_op_->add_batch(
|
||||
*param_exprs, *brs_->skip_, end_pos_, begin_pos_, &stored_rows_count))) {
|
||||
LOG_WARN("failed to add batch");
|
||||
} else {
|
||||
extra_info->sort_row_count_ += stored_rows_count;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::ObSelector::add_batch(
|
||||
const ObIArray<ObExpr *> *param_exprs,
|
||||
ObSortOpImpl *unique_sort_op,
|
||||
@ -4463,6 +4747,24 @@ int ObAggregateProcessor::ObSelector::add_batch(
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::ObSelector::add_batch(
|
||||
const ObIArray<ObExpr *> *param_exprs,
|
||||
HybridHistExtraResult *extra_info,
|
||||
ObEvalCtx &eval_ctx
|
||||
) const
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_NOT_NULL(extra_info->sort_op_)) {
|
||||
if (OB_FAIL(extra_info->sort_op_->add_batch(
|
||||
*param_exprs, *brs_->skip_, brs_->size_, selector_array_, count_))) {
|
||||
LOG_WARN("failed to add batch");
|
||||
} else {
|
||||
extra_info->sort_row_count_ += count_;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int ObAggregateProcessor::top_fre_hist_calc_batch(
|
||||
const ObAggrInfo &aggr_info,
|
||||
@ -5010,15 +5312,10 @@ int ObAggregateProcessor::llc_add_value(const uint64_t value, char *llc_bitmap_b
|
||||
pmax = ObExprEstimateNdv::llc_leading_zeros(value << LLC_BUCKET_BITS, 64 - LLC_BUCKET_BITS) + 1;
|
||||
}
|
||||
ObString::obstr_size_t llc_num_buckets = size;
|
||||
if (OB_UNLIKELY(size != get_llc_size())) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("buffer size don't match", K(size), K(get_llc_size()));
|
||||
} else if (OB_UNLIKELY(!ObExprEstimateNdv::llc_is_num_buckets_valid(llc_num_buckets))
|
||||
|| OB_UNLIKELY(llc_num_buckets <= bucket_index)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("llc_add_value failed because number of buckets is not valid",
|
||||
K(llc_num_buckets), K(bucket_index), K(ret));
|
||||
} else if (pmax > static_cast<uint8_t>(llc_bitmap_buf[bucket_index])) {
|
||||
OB_ASSERT(size == get_llc_size());
|
||||
OB_ASSERT(ObExprEstimateNdv::llc_is_num_buckets_valid(llc_num_buckets));
|
||||
OB_ASSERT(llc_num_buckets > bucket_index);
|
||||
if (pmax > static_cast<uint8_t>(llc_bitmap_buf[bucket_index])) {
|
||||
// 理论上pmax不会超过65.
|
||||
llc_bitmap_buf[bucket_index] = static_cast<uint8_t>(pmax);
|
||||
}
|
||||
@ -5633,19 +5930,21 @@ int ObAggregateProcessor::convert_datum_to_obj(const ObAggrInfo &aggr_info,
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::compute_hybrid_hist_result(const ObAggrInfo &aggr_info,
|
||||
GroupConcatExtraResult *&extra,
|
||||
HybridHistExtraResult *&extra,
|
||||
ObDatum &result)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObHybridHistograms hybrid_hist;
|
||||
ObSEArray<BucketNode, 4> bucket_nodes;
|
||||
ObDatum *bucket_num_result = NULL;
|
||||
int64_t bucket_num = 0;
|
||||
int64_t num_distinct = 0;
|
||||
int64_t null_count = 0;
|
||||
int64_t total_count = 0;
|
||||
int64_t pop_count = 0;
|
||||
int64_t pop_freq = 0;
|
||||
if (OB_ISNULL(extra) || OB_ISNULL(aggr_info.bucket_num_param_expr_) ||
|
||||
OB_UNLIKELY(extra->empty() || aggr_info.param_exprs_.count() != 1 ||
|
||||
OB_UNLIKELY(extra->get_sort_row_count() == 0 ||
|
||||
aggr_info.param_exprs_.count() != 1 ||
|
||||
aggr_info.sort_collations_.count() != 1 ||
|
||||
!aggr_info.bucket_num_param_expr_->obj_meta_.is_numeric_type())) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
@ -5662,45 +5961,63 @@ int ObAggregateProcessor::compute_hybrid_hist_result(const ObAggrInfo &aggr_info
|
||||
} else if (bucket_num <= 0) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("get invalid argument", K(ret), K(bucket_num));
|
||||
} else if (extra->is_iterated() && OB_FAIL(extra->rewind())) {
|
||||
LOG_WARN("rewind failed", KPC(extra), K(ret));
|
||||
} else if (!extra->is_iterated() && OB_FAIL(extra->finish_add_row())) {
|
||||
} else if ( OB_FAIL(extra->finish_add_sort_row())) {
|
||||
LOG_WARN("finish_add_row failed", KPC(extra), K(ret));
|
||||
} else {
|
||||
ObObj pre_obj;
|
||||
pre_obj.set_null();
|
||||
ObChunkDatumStore::LastStoredRow prev_row(aggr_alloc_);
|
||||
const int64_t extra_size = sizeof(BucketDesc);
|
||||
int64_t repeat_count = 0;
|
||||
const ObChunkDatumStore::StoredRow *stored_row = NULL;
|
||||
while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row(stored_row))) {
|
||||
ObObj cur_obj;
|
||||
const ObChunkDatumStore::StoredRow *mat_stored_row = NULL;
|
||||
// get null count
|
||||
while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row_from_sort(stored_row))) {
|
||||
if (OB_ISNULL(stored_row) || OB_UNLIKELY(stored_row->cnt_ != 1)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("get unexpected null", K(ret), K(stored_row));
|
||||
} else if (OB_FAIL(stored_row->cells()[0].to_obj(cur_obj,
|
||||
aggr_info.param_exprs_.at(0)->obj_meta_))) {
|
||||
LOG_WARN("failed to obj", K(ret));
|
||||
} else if (stored_row->cells()[0].is_null()) {
|
||||
++ null_count;
|
||||
} else if (OB_FAIL(prev_row.save_store_row(*stored_row))) {
|
||||
LOG_WARN("failed to deep copy limit last rows", K(ret));
|
||||
} else {
|
||||
++ total_count;
|
||||
if (cur_obj.is_null()) {
|
||||
++ null_count;
|
||||
} else if (!pre_obj.is_null() &&
|
||||
pre_obj.is_equal(cur_obj, aggr_info.sort_collations_.at(0).cs_type_)) {
|
||||
++ repeat_count;
|
||||
} else {
|
||||
// new value is found
|
||||
if (repeat_count > 0 && !pre_obj.is_null()) {
|
||||
if (OB_FAIL(bucket_nodes.push_back(BucketNode(pre_obj, repeat_count)))) {
|
||||
LOG_WARN("failed to push back bucket node", K(ret));
|
||||
} else {/*do nothing*/}
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(ob_write_obj(aggr_alloc_, cur_obj, pre_obj))) {
|
||||
LOG_WARN("fail to clone cell", K(ret), K(cur_obj));
|
||||
} else {
|
||||
repeat_count = 1;
|
||||
++ num_distinct;
|
||||
}
|
||||
repeat_count = 1;
|
||||
++ num_distinct;
|
||||
break;
|
||||
}
|
||||
}
|
||||
total_count = extra->get_sort_row_count() - null_count;
|
||||
int64_t pop_threshold = total_count / bucket_num;
|
||||
// get all bucket node and store them into chunk datum store
|
||||
while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row_from_sort(stored_row))) {
|
||||
bool is_equal = false;
|
||||
if (OB_ISNULL(stored_row) || OB_UNLIKELY(stored_row->cnt_ != 1)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("get unexpected null", K(stored_row));
|
||||
} else if (OB_FAIL(check_rows_equal(prev_row, *stored_row, aggr_info, is_equal))) {
|
||||
LOG_WARN("failed to is order by item equal with prev row", K(ret));
|
||||
} else if (is_equal) {
|
||||
++ repeat_count;
|
||||
} else if (OB_ISNULL(prev_row.store_row_)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("get unexpected null", K(prev_row.store_row_));
|
||||
} else if (OB_FAIL(extra->add_material_row(prev_row.store_row_->cells(),
|
||||
prev_row.store_row_->cnt_,
|
||||
extra_size, mat_stored_row))) {
|
||||
LOG_WARN("failed to add material row");
|
||||
} else if (OB_FAIL(prev_row.save_store_row(*stored_row))) {
|
||||
LOG_WARN("failed to deep copy limit last rows", K(ret));
|
||||
} else if (OB_ISNULL(mat_stored_row)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("get unexpected null", K(mat_stored_row));
|
||||
} else {
|
||||
BucketDesc *desc = reinterpret_cast<BucketDesc*>(mat_stored_row->get_extra_payload());
|
||||
desc->ep_count_ = repeat_count;
|
||||
desc->is_pop_ = repeat_count > pop_threshold;
|
||||
if (desc->is_pop_) {
|
||||
pop_freq += repeat_count;
|
||||
++ pop_count;
|
||||
}
|
||||
repeat_count = 1;
|
||||
++ num_distinct;
|
||||
}
|
||||
}
|
||||
if (ret != OB_ITER_END && ret != OB_SUCCESS) {
|
||||
@ -5708,15 +6025,35 @@ int ObAggregateProcessor::compute_hybrid_hist_result(const ObAggrInfo &aggr_info
|
||||
} else {
|
||||
ret = OB_SUCCESS;
|
||||
bool has_lob_header = aggr_info.expr_->obj_meta_.has_lob_header();
|
||||
if (!pre_obj.is_null() &&
|
||||
OB_FAIL(bucket_nodes.push_back(BucketNode(pre_obj, repeat_count)))) {
|
||||
LOG_WARN("failed to add the last value into bucket", K(ret));
|
||||
} else if (OB_FAIL(hybrid_hist.build_hybrid_hist(bucket_nodes,
|
||||
bucket_num,
|
||||
total_count - null_count,
|
||||
num_distinct))) {
|
||||
LOG_WARN("failed to build hybrid hist", K(ret));
|
||||
} else if (OB_FAIL(get_hybrid_hist_result(hybrid_hist, has_lob_header, result))) {
|
||||
if (prev_row.store_row_ != nullptr) {
|
||||
if (OB_ISNULL(prev_row.store_row_)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("get unexpected null", K(prev_row.store_row_));
|
||||
} else if (OB_FAIL(extra->add_material_row(prev_row.store_row_->cells(),
|
||||
prev_row.store_row_->cnt_,
|
||||
extra_size, mat_stored_row))) {
|
||||
LOG_WARN("failed to add material row");
|
||||
} else if (OB_ISNULL(mat_stored_row)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("get unexpected null", K(mat_stored_row));
|
||||
} else {
|
||||
BucketDesc *desc = reinterpret_cast<BucketDesc*>(mat_stored_row->get_extra_payload());
|
||||
desc->ep_count_ = repeat_count;
|
||||
desc->is_pop_ = repeat_count > pop_threshold;
|
||||
if (desc->is_pop_) {
|
||||
pop_freq += repeat_count;
|
||||
++ pop_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(extra->finish_add_material_row())) {
|
||||
LOG_WARN("failed to finish add material row", K(ret));
|
||||
} else if (OB_FAIL(hybrid_hist.build_hybrid_hist(extra, &aggr_alloc_, bucket_num, total_count,
|
||||
num_distinct, pop_count, pop_freq,
|
||||
aggr_info.param_exprs_.at(0)->obj_meta_))) {
|
||||
LOG_WARN("failed to build hybrid hist", K(ret), K(&aggr_alloc_));
|
||||
} else if (OB_FAIL(get_hybrid_hist_result(&hybrid_hist, has_lob_header, result))) {
|
||||
LOG_WARN("failed to get hybrid hist result", K(ret));
|
||||
} else {/*do nothing*/}
|
||||
}
|
||||
@ -5724,21 +6061,24 @@ int ObAggregateProcessor::compute_hybrid_hist_result(const ObAggrInfo &aggr_info
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAggregateProcessor::get_hybrid_hist_result(ObHybridHistograms &hybrid_hist,
|
||||
int ObAggregateProcessor::get_hybrid_hist_result(ObHybridHistograms *hybrid_hist,
|
||||
bool has_lob_header,
|
||||
ObDatum &result_datum)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (hybrid_hist.get_buckets().count() > 0) {
|
||||
if (OB_ISNULL(hybrid_hist)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("get null hybrid histograms", K(ret));
|
||||
} else if (hybrid_hist->get_buckets().count() > 0) {
|
||||
char *buf = NULL;
|
||||
int64_t buf_size = hybrid_hist.get_serialize_size();
|
||||
int64_t buf_size = hybrid_hist->get_serialize_size();
|
||||
int64_t buf_pos = 0;
|
||||
ObTextStringResult new_tmp_lob(ObLongTextType, has_lob_header, &aggr_alloc_);
|
||||
if (OB_FAIL(new_tmp_lob.init(buf_size))) {
|
||||
LOG_WARN("tmp lob init failed", K(ret), K(buf_size));
|
||||
} else if (OB_FAIL(new_tmp_lob.get_reserved_buffer(buf, buf_size))) {
|
||||
LOG_WARN("tmp lob append failed", K(ret), K(new_tmp_lob));
|
||||
} else if (OB_FAIL(hybrid_hist.serialize(buf, buf_size, buf_pos))) {
|
||||
} else if (OB_FAIL(hybrid_hist->serialize(buf, buf_size, buf_pos))) {
|
||||
LOG_WARN("fail serialize init task arg", KP(buf), K(buf_size), K(buf_pos), K(ret));
|
||||
} else if (OB_FAIL(new_tmp_lob.lseek(buf_pos, 0))) {
|
||||
LOG_WARN("temp lob lseek failed", K(ret), K(new_tmp_lob), K(buf_pos));
|
||||
@ -5746,11 +6086,11 @@ int ObAggregateProcessor::get_hybrid_hist_result(ObHybridHistograms &hybrid_hist
|
||||
ObString lob_loc_str;
|
||||
new_tmp_lob.get_result_buffer(lob_loc_str);
|
||||
result_datum.set_string(lob_loc_str);
|
||||
LOG_TRACE("succeed to get topK fre hist result", K(result_datum), K(hybrid_hist));
|
||||
LOG_TRACE("succeed to get hybrid hist result", K(result_datum), KPC(hybrid_hist));
|
||||
}
|
||||
} else {
|
||||
result_datum.set_null();
|
||||
LOG_TRACE("succeed to get topK fre hist result", K(result_datum), K(hybrid_hist));
|
||||
LOG_TRACE("succeed to get hybrid hist result", K(result_datum), KPC(hybrid_hist));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -28,13 +28,18 @@
|
||||
#include "sql/engine/user_defined_function/ob_pl_user_defined_agg_function.h"
|
||||
#include "sql/engine/expr/ob_expr_dll_udf.h"
|
||||
#include "sql/engine/expr/ob_rt_datum_arith.h"
|
||||
#include "share/stat/ob_hybrid_hist_estimator.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
class ObHybridHistograms;
|
||||
}
|
||||
namespace sql
|
||||
{
|
||||
|
||||
class ObMaterialOpImpl;
|
||||
|
||||
struct RemovalInfo
|
||||
{
|
||||
RemovalInfo()
|
||||
@ -375,6 +380,49 @@ public:
|
||||
common::ObFixedArray<bool, common::ObIAllocator> bool_mark_;
|
||||
};
|
||||
|
||||
class HybridHistExtraResult : public ExtraResult
|
||||
{
|
||||
public:
|
||||
explicit HybridHistExtraResult(common::ObIAllocator &alloc)
|
||||
: ExtraResult(alloc),
|
||||
sort_row_count_(0),
|
||||
material_row_count_(0),
|
||||
sort_op_(nullptr),
|
||||
mat_op_(nullptr)
|
||||
{}
|
||||
virtual ~HybridHistExtraResult();
|
||||
void reuse_self();
|
||||
virtual void reuse() override;
|
||||
|
||||
int init(const uint64_t tenant_id, const ObAggrInfo &aggr_info,
|
||||
ObEvalCtx &eval_ctx, const bool need_rewind,
|
||||
ObIOEventObserver *io_event_observer, ObSqlWorkAreaProfile &profile,
|
||||
ObMonitorNode &op_monitor_info);
|
||||
|
||||
int add_sort_row(const ObIArray<ObExpr *> &expr, ObEvalCtx &eval_ctx);
|
||||
int add_sort_row(const ObChunkDatumStore::StoredRow &sr);
|
||||
int get_next_row_from_sort(const ObChunkDatumStore::StoredRow *&sr);
|
||||
int finish_add_sort_row();
|
||||
int add_material_row(const ObDatum *src_datums,
|
||||
const int64_t datum_cnt,
|
||||
const int64_t extra_size,
|
||||
const ObChunkDatumStore::StoredRow *&store_row);
|
||||
int get_next_row_from_material(const ObChunkDatumStore::StoredRow *&sr);
|
||||
int finish_add_material_row();
|
||||
|
||||
// bool empty() const { return 0 == row_count_; }
|
||||
int64_t get_sort_row_count() const { return sort_row_count_; }
|
||||
int64_t get_material_row_count() const { return material_row_count_; }
|
||||
DECLARE_VIRTUAL_TO_STRING;
|
||||
|
||||
public:
|
||||
int64_t sort_row_count_;
|
||||
int64_t material_row_count_;
|
||||
|
||||
ObSortOpImpl *sort_op_;
|
||||
ObMaterialOpImpl *mat_op_;
|
||||
};
|
||||
|
||||
struct DllUdfExtra : public ExtraResult
|
||||
{
|
||||
explicit DllUdfExtra(common::ObIAllocator &alloc)
|
||||
@ -476,6 +524,9 @@ public:
|
||||
uint16_t get_batch_index(uint16_t i) const { return selector_array_[i]; }
|
||||
int add_batch(const ObIArray<ObExpr *> *param_exprs, ObSortOpImpl *unique_sort_op,
|
||||
GroupConcatExtraResult *extra_info, ObEvalCtx &eval_ctx) const;
|
||||
int add_batch(const ObIArray<ObExpr *> *param_exprs,
|
||||
HybridHistExtraResult *extra_info,
|
||||
ObEvalCtx &eval_ctx) const;
|
||||
TO_STRING_KV(K_(count));
|
||||
const ObBatchRows *brs_;
|
||||
const uint16_t *selector_array_;
|
||||
@ -509,6 +560,9 @@ public:
|
||||
uint16_t get_batch_index(uint16_t i) const { return i; }
|
||||
int add_batch(const ObIArray<ObExpr *> *param_exprs, ObSortOpImpl *unique_sort_op,
|
||||
GroupConcatExtraResult *extra_info, ObEvalCtx &eval_ctx) const;
|
||||
int add_batch(const ObIArray<ObExpr *> *param_exprs,
|
||||
HybridHistExtraResult *extra_info,
|
||||
ObEvalCtx &eval_ctx) const;
|
||||
TO_STRING_KV(K_(begin_pos), K_(end_pos));
|
||||
const ObBatchRows *brs_;
|
||||
uint16_t begin_pos_;
|
||||
@ -548,7 +602,8 @@ public:
|
||||
public:
|
||||
ObAggregateProcessor(ObEvalCtx &eval_ctx,
|
||||
ObIArray<ObAggrInfo> &aggr_infos,
|
||||
const lib::ObLabel &label);
|
||||
const lib::ObLabel &label,
|
||||
ObMonitorNode &op_monitor_info);
|
||||
~ObAggregateProcessor() { destroy(); };
|
||||
|
||||
int init();
|
||||
@ -855,10 +910,10 @@ private:
|
||||
ObDatum &result_datum);
|
||||
|
||||
int compute_hybrid_hist_result(const ObAggrInfo &aggr_info,
|
||||
GroupConcatExtraResult *&extra,
|
||||
HybridHistExtraResult *&extra,
|
||||
ObDatum &result);
|
||||
|
||||
int get_hybrid_hist_result(ObHybridHistograms &hybrid_hist,
|
||||
int get_hybrid_hist_result(ObHybridHistograms *hybrid_hist,
|
||||
bool has_lob_header,
|
||||
ObDatum &result_datum);
|
||||
|
||||
@ -971,6 +1026,8 @@ private:
|
||||
RemovalInfo removal_info_;
|
||||
bool support_fast_single_row_agg_;
|
||||
ObIArray<ObEvalInfo *> *op_eval_infos_;
|
||||
ObSqlWorkAreaProfile profile_;
|
||||
ObMonitorNode &op_monitor_info_;
|
||||
};
|
||||
|
||||
struct ObAggregateCalcFunc
|
||||
|
||||
@ -68,7 +68,8 @@ public:
|
||||
: ObOperator(exec_ctx, spec, input),
|
||||
aggr_processor_(eval_ctx_,
|
||||
(static_cast<ObGroupBySpec &>(const_cast<ObOpSpec &>(spec))).aggr_infos_,
|
||||
ObModIds::OB_SQL_AGGR_FUNC_ROW)
|
||||
ObModIds::OB_SQL_AGGR_FUNC_ROW,
|
||||
op_monitor_info_)
|
||||
{
|
||||
}
|
||||
inline ObAggregateProcessor &get_aggr_processor() { return aggr_processor_; }
|
||||
|
||||
Reference in New Issue
Block a user