/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX SQL_ENG #include "sql/engine/aggregate/ob_merge_groupby_op.h" #include "lib/number/ob_number_v2.h" #include "sql/engine/px/ob_px_sqc_proxy.h" #include "lib/utility/ob_hyperloglog.h" #include "sql/engine/px/ob_px_util.h" #include "sql/engine/px/ob_px_sqc_handler.h" namespace oceanbase { using namespace common; namespace sql { OB_SERIALIZE_MEMBER((ObMergeGroupBySpec, ObGroupBySpec), group_exprs_, rollup_exprs_, is_duplicate_rollup_expr_, has_rollup_, distinct_exprs_, is_parallel_, rollup_status_, rollup_id_expr_, sort_exprs_, sort_collations_, sort_cmp_funcs_, enable_encode_sort_ ); DEF_TO_STRING(ObMergeGroupBySpec) { int64_t pos = 0; J_OBJ_START(); J_NAME("groupby_spec"); J_COLON(); pos += ObGroupBySpec::to_string(buf + pos, buf_len - pos); J_COMMA(); J_KV(K_(group_exprs), K_(rollup_exprs), K_(is_duplicate_rollup_expr), K_(has_rollup)); J_OBJ_END(); return pos; } int ObMergeGroupBySpec::add_group_expr(ObExpr *expr) { int ret = OB_SUCCESS; if (OB_ISNULL(expr)) { ret = OB_INVALID_ARGUMENT; OB_LOG(WARN, "invalid argument", K(ret)); } else if (OB_FAIL(group_exprs_.push_back(expr))) { LOG_ERROR("failed to push_back expr"); } return ret; } int ObMergeGroupBySpec::add_rollup_expr(ObExpr *expr) { int ret = OB_SUCCESS; if (OB_ISNULL(expr)) { ret = OB_INVALID_ARGUMENT; OB_LOG(WARN, "invalid argument", K(ret)); } else if (OB_FAIL(rollup_exprs_.push_back(expr))) { LOG_ERROR("failed to push_back expr"); } return ret; } void ObMergeGroupByOp::reset() { is_end_ = false; cur_output_group_id_= OB_INVALID_INDEX; first_output_group_id_ = 0; last_child_output_.reset(); curr_group_rowid_ = common::OB_INVALID_INDEX; output_queue_cnt_ = 0; for(auto i = 0; i < output_groupby_rows_.count(); i++) { if (OB_NOT_NULL(output_groupby_rows_.at(i))) { output_groupby_rows_.at(i)->reset(); } } brs_holder_.reset(); output_groupby_rows_.reset(); cur_group_row_ = nullptr; is_first_calc_ = true; cur_group_last_row_idx_ = -1; first_batch_from_sort_ = false; partial_rollup_idx_ = INT64_MAX; cur_grouping_id_ = INT64_MAX; use_sort_data_ = false; inner_sort_.reset(); global_rollup_key_.reset(); } int ObMergeGroupByOp::init_rollup_distributor() { int ret = OB_SUCCESS; if (ObRollupStatus::ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_) { // init hyperloglog calculator to calculate ndv char *buf = (char*)ctx_.get_allocator().alloc( sizeof(ObHyperLogLogCalculator) * (MY_SPEC.rollup_exprs_.count() + 1)); if (OB_ISNULL(buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory", K(ret)); } else { // init ndv calculator ndv_calculator_ = reinterpret_cast(buf); for (int64_t i = 0; i < MY_SPEC.rollup_exprs_.count() + 1 && OB_SUCC(ret); ++i) { new (&ndv_calculator_[i]) ObHyperLogLogCalculator(); if (OB_FAIL(ndv_calculator_[i].init(&ctx_.get_allocator(), N_HYPERLOGLOG_BIT))) { LOG_WARN("failed to initialize ndv calculator", K(ret)); } } // init sort if (OB_FAIL(ret)) { } else if (0 == all_groupby_exprs_.count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: all_groupby_exprs is empty", K(ret)); } else if (OB_FAIL((append(inner_sort_exprs_, MY_SPEC.sort_exprs_)))) { LOG_WARN("failed to append exprs", K(ret)); } else { for (int64_t i = 0; i < child_->get_spec().output_.count() && OB_SUCC(ret); ++i) { ObExpr *expr = child_->get_spec().output_.at(i); if (!is_contain(inner_sort_exprs_, expr)) { if (OB_FAIL(inner_sort_exprs_.push_back(expr))) { LOG_WARN("failed to push back expr", K(ret)); } } } } int64_t row_count = child_->get_spec().rows_; if (OB_FAIL(ret)) { } else if (OB_FAIL(inner_sort_.init(ctx_.get_my_session()->get_effective_tenant_id(), &MY_SPEC.sort_collations_, &MY_SPEC.sort_cmp_funcs_, &eval_ctx_, &ctx_, MY_SPEC.enable_encode_sort_, false, false /* need_rewind */))) { LOG_WARN("failed to init sort", K(ret)); } else if (OB_FAIL(ObPxEstimateSizeUtil::get_px_size(&ctx_, MY_SPEC.px_est_size_factor_, row_count, row_count))) { LOG_WARN("failed to get px size", K(ret)); } else { inner_sort_.set_input_rows(row_count); inner_sort_.set_input_width(MY_SPEC.width_); inner_sort_.set_operator_type(MY_SPEC.type_); inner_sort_.set_operator_id(MY_SPEC.id_); inner_sort_.set_io_event_observer(&io_event_observer_); } // init hash values if (OB_SUCC(ret) && is_vectorized()) { int64_t max_size = MY_SPEC.max_batch_size_; int64_t rollup_hash_vals_pos = 0; int64_t sort_batch_skip_pos = rollup_hash_vals_pos + sizeof(uint64_t) * max_size; int64_t max_mem_size = sort_batch_skip_pos + ObBitVector::memory_size(max_size); char *buf = (char*)ctx_.get_allocator().alloc(max_mem_size); if (OB_ISNULL(buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory", K(ret), K(max_size)); } else { MEMSET(buf, 0, max_mem_size); rollup_hash_vals_ = reinterpret_cast(buf); sort_batch_rows_.skip_ = to_bit_vector(buf + sort_batch_skip_pos); } } } } return ret; } // it's need call for rescan int ObMergeGroupByOp::init_group_rows() { int ret = OB_SUCCESS; const int64_t col_count = (MY_SPEC.has_rollup_ ? (MY_SPEC.group_exprs_.count() + MY_SPEC.rollup_exprs_.count() + 1) : 0); // for vectorization, from 0 and col_count - 2 are the rollup group row // and the col_col - 1 is the last group row // it's not init firstly all_groupby_exprs_.reset(); if (OB_FAIL(append(all_groupby_exprs_, MY_SPEC.group_exprs_))) { LOG_WARN("failed to append group exprs", K(ret)); } else if (MY_SPEC.has_rollup_ && OB_FAIL(append(all_groupby_exprs_, MY_SPEC.rollup_exprs_))) { LOG_WARN("failed to append group exprs", K(ret)); } else if (!is_vectorized()) { if (OB_FAIL(aggr_processor_.init_group_rows(col_count))) { LOG_WARN("failed to initialize init_group_rows", K(ret)); } } else { if (OB_FAIL(aggr_processor_.init_group_rows(col_count - 1))) { LOG_WARN("failed to initialize init_group_rows", K(ret)); } else if (MY_SPEC.has_rollup_) { curr_group_rowid_ = all_groupby_exprs_.count(); cur_output_group_id_ = all_groupby_exprs_.count(); } else { curr_group_rowid_ = 0; cur_output_group_id_ = 0; } } aggr_processor_.set_rollup_info(MY_SPEC.rollup_status_, MY_SPEC.rollup_id_expr_); aggr_processor_.set_partial_rollup_idx(MY_SPEC.group_exprs_.count(), all_groupby_exprs_.count()); return ret; } // only init in inner_open int ObMergeGroupByOp::init() { int ret = OB_SUCCESS; if (OB_FAIL(ObChunkStoreUtil::alloc_dir_id(dir_id_))) { LOG_WARN("failed to alloc dir id", K(ret)); } else if (FALSE_IT(aggr_processor_.set_dir_id(dir_id_))) { } else if (FALSE_IT(aggr_processor_.set_io_event_observer(&io_event_observer_))) { } else if (OB_FAIL(init_group_rows())) { LOG_WARN("failed to init group rows", K(ret)); } else if (!is_vectorized()) { if (OB_FAIL(init_rollup_distributor())) { LOG_WARN("failed to init rollup distributor", K(ret)); } } else { if (OB_FAIL(brs_holder_.init(child_->get_spec().output_, eval_ctx_))) { LOG_WARN("failed to initialize brs_holder_", K(ret)); } else if (OB_FAIL(init_rollup_distributor())) { LOG_WARN("failed to init rollup distributor", K(ret)); } else { // prepare initial group if (MY_SPEC.has_rollup_) { for (int64_t i = 0; !has_dup_group_expr_ && i < MY_SPEC.is_duplicate_rollup_expr_.count(); ++i) { has_dup_group_expr_ = MY_SPEC.is_duplicate_rollup_expr_.at(i); } } } } if (OB_SUCC(ret)) { int64_t idx = -1; for (int64_t i = 0; i < MY_SPEC.distinct_exprs_.count() && OB_SUCC(ret); ++i) { if (has_exist_in_array(child_->get_spec().output_, MY_SPEC.distinct_exprs_.at(i), &idx)) { OZ(distinct_col_idx_in_output_.push_back(idx)); } else { // is_const distinct_col_idx_in_output_.push_back(-1); LOG_DEBUG("distinct expr is const and is not in the output of child", K(i), K(ret)); } } LOG_DEBUG("debug distinct exprs", K(ret), K(MY_SPEC.distinct_exprs_.count())); } if (OB_SUCC(ret) && aggr_processor_.has_distinct()) { // set group_batch_factor_ to 1 avoid out of memory error group_batch_factor_ = 1; } // group by c1,c2, rollup(c3,c4) // then rollup will generate new group row that c3 is different last_child_output_.reuse_ = true; return ret; } int ObMergeGroupByOp::inner_open() { int ret = OB_SUCCESS; reset(); if (OB_FAIL(ObGroupByOp::inner_open())) { LOG_WARN("failed to inner_open", K(ret)); } else if (OB_FAIL(init())) { LOG_WARN("failed to init", K(ret)); } return ret; } int ObMergeGroupByOp::inner_close() { reset(); return ObGroupByOp::inner_close(); } void ObMergeGroupByOp::destroy() { all_groupby_exprs_.reset(); distinct_col_idx_in_output_.reset(); inner_sort_exprs_.reset(); reset(); ObGroupByOp::destroy(); } int ObMergeGroupByOp::inner_switch_iterator() { int ret = OB_SUCCESS; reset(); if (OB_FAIL(ObGroupByOp::inner_switch_iterator())) { LOG_WARN("failed to switch_iterator", K(ret)); } else if (OB_FAIL(init_group_rows())) { LOG_WARN("failed to init group rows", K(ret)); } return ret; } int ObMergeGroupByOp::inner_rescan() { int ret = OB_SUCCESS; reset(); if (OB_FAIL(ObGroupByOp::inner_rescan())) { LOG_WARN("failed to rescan", K(ret)); } else if (OB_FAIL(init_group_rows())) { LOG_WARN("failed to init group rows", K(ret)); } return ret; } void ObMergeGroupByOp::set_rollup_expr_null(int64_t group_id) { const bool is_dup_expr = (group_id < MY_SPEC.group_exprs_.count() ? true : MY_SPEC.is_duplicate_rollup_expr_.at(group_id - MY_SPEC.group_exprs_.count())); if (!is_dup_expr) { ObExpr *null_expr = const_cast(group_id < MY_SPEC.group_exprs_.count() ? MY_SPEC.group_exprs_[group_id] : MY_SPEC.rollup_exprs_[group_id - MY_SPEC.group_exprs_.count()]); null_expr->locate_expr_datum(eval_ctx_).set_null(); null_expr->set_evaluated_projected(eval_ctx_); } } int ObMergeGroupByOp::rewrite_rollup_column(ObExpr *&diff_expr) { int ret = OB_SUCCESS; // output rollup results here if (max_output_group_id_ == cur_output_group_id_ && cur_output_group_id_ < all_groupby_exprs_.count() - 1) { // set null for partial rollup exprs // eg: c1,c2,rollup(c3,c4,c5) // then // group_id // 5 (c1, c2, c3, c4, c5) // 4 (c1, c2, c3, c4, null) // 3 (c1, c2, c3, null, null) // 2 (c1, c2, null, null, null) // if partial rollup key is (c1,c2,c3,c4), cur_output_group_id_=4-1=3 then set (c5) to null // (c4) set null below for (int64_t i = cur_output_group_id_ + 1; i < all_groupby_exprs_.count(); ++i) { set_rollup_expr_null(i); } } set_rollup_expr_null(cur_output_group_id_); diff_expr = const_cast(cur_output_group_id_ < MY_SPEC.group_exprs_.count() ? MY_SPEC.group_exprs_[cur_output_group_id_] : MY_SPEC.rollup_exprs_[cur_output_group_id_ - MY_SPEC.group_exprs_.count()]); LOG_DEBUG("debug write rollup column 1", KP(diff_expr), K(cur_output_group_id_)); //for SELECT GROUPING(z0_test0) FROM Z0CASE GROUP BY z0_test0, ROLLUP(z0_test0); //issue:https://work.aone.alibaba-inc.com/issue/33780805 if (cur_output_group_id_ >= MY_SPEC.group_exprs_.count()) { for (int64_t i = 0; diff_expr != NULL && i < MY_SPEC.group_exprs_.count(); ++i) { if (MY_SPEC.group_exprs_[i] == diff_expr) { diff_expr = NULL; } } const bool is_dup_expr = (cur_output_group_id_ < MY_SPEC.group_exprs_.count() ? true : MY_SPEC.is_duplicate_rollup_expr_.at( cur_output_group_id_ - MY_SPEC.group_exprs_.count())); if (is_dup_expr) { diff_expr = nullptr; } } LOG_DEBUG("debug write rollup column", KP(diff_expr), K(cur_output_group_id_)); return ret; } int ObMergeGroupByOp::get_child_next_row() { int ret = OB_SUCCESS; if (use_sort_data_) { if (OB_FAIL(inner_sort_.get_next_row(inner_sort_exprs_))) { if (OB_ITER_END != ret) { LOG_WARN("failed to get sorted row", K(ret)); } } else { LOG_DEBUG("debug get sort row", K(ROWEXPR2STR(eval_ctx_, child_->get_spec().output_))); } } else { if (OB_FAIL(child_->get_next_row())) { if (OB_ITER_END != ret) { LOG_WARN("failed to get child row", K(ret)); } } } return ret; } int ObMergeGroupByOp::find_candidate_key(ObRollupNDVInfo &ndv_info) { int ret = OB_SUCCESS; int64_t n_group = MY_SPEC.group_exprs_.count(); uint64_t candicate_ndv = 0; ObPxSqcHandler *sqc_handle = ctx_.get_sqc_handler(); ndv_info.dop_ = 1; // ndv_info.max_keys_ = 0; // TODO: Three stage can't process rollup level ndv_info.max_keys_ = ObThreeStageAggrStage::SECOND_STAGE == MY_SPEC.aggr_stage_ ? all_groupby_exprs_.count() : 0; if (OB_NOT_NULL(sqc_handle)) { ObPxRpcInitSqcArgs &sqc_args = sqc_handle->get_sqc_init_arg(); ndv_info.dop_ = sqc_args.sqc_.get_total_task_count(); } for (int64_t i = 0; i < MY_SPEC.rollup_exprs_.count() + 1 && OB_SUCC(ret); ++i) { if (0 == n_group && i == MY_SPEC.rollup_exprs_.count()) { break; } candicate_ndv = ndv_calculator_[i].estimate(); if (candicate_ndv >= ObRollupKeyPieceMsgCtx::FAR_GREATER_THAN_RATIO * ndv_info.dop_) { ndv_info.ndv_ = candicate_ndv; ndv_info.n_keys_ = 0 == n_group ? i + 1 : i + n_group; break; } } if (0 == ndv_info.n_keys_) { // can't found, use all groupby keys ndv_info.ndv_ = candicate_ndv; ndv_info.n_keys_ = all_groupby_exprs_.count(); } return ret; } int ObMergeGroupByOp::collect_local_ndvs() { int ret = OB_SUCCESS; int64_t n_group = MY_SPEC.group_exprs_.count(); // same as hash groupby uint64_t hash_value = 99194853094755497L; ObDatum *datum = nullptr; for (int64_t i = 0; i < all_groupby_exprs_.count() && OB_SUCC(ret); ++i) { ObExpr *expr = all_groupby_exprs_.at(i); if (OB_FAIL(expr->eval(eval_ctx_, datum))) { LOG_WARN("failed to eval expr", K(ret)); } else { hash_value = expr->basic_funcs_->murmur_hash_(*datum, hash_value); if ((0 < n_group && i == n_group - 1) || i >= n_group) { if (0 < n_group) { ndv_calculator_[i - n_group + 1].set(hash_value); } else { ndv_calculator_[i - n_group].set(hash_value); } } } } return ret; } int ObMergeGroupByOp::process_parallel_rollup_key(ObRollupNDVInfo &ndv_info) { int ret = OB_SUCCESS; ObRollupKeyWholeMsg whole_msg; const ObRollupKeyWholeMsg *temp_whole_msg = NULL; ObPxSqcHandler *handler = ctx_.get_sqc_handler(); if (OB_ISNULL(handler)) { ret = OB_NOT_SUPPORTED; LOG_WARN("parallel merge groupby only supported in parallel execution mode", K(MY_SPEC.is_parallel_)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "parallel winbuf in non-px mode"); } else { ObPxSQCProxy &proxy = handler->get_sqc_proxy(); ObRollupKeyPieceMsg piece; piece.op_id_ = MY_SPEC.id_; piece.thread_id_ = GETTID(); piece.dfo_id_ = proxy.get_dfo_id(); piece.rollup_ndv_ = ndv_info; if (OB_FAIL(proxy.get_dh_msg(MY_SPEC.id_, piece, temp_whole_msg, ctx_.get_physical_plan_ctx()->get_timeout_timestamp()))) { LOG_WARN("fail get rollup key msg", K(ret)); } else if (OB_ISNULL(temp_whole_msg)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("whole msg is unexpected", K(ret)); } else if (OB_FAIL(whole_msg.assign(*temp_whole_msg))) { LOG_WARN("fail to assign msg", K(ret)); } else { global_rollup_key_ = whole_msg.rollup_ndv_; if (global_rollup_key_.n_keys_ > MY_SPEC.group_exprs_.count()) { partial_rollup_idx_ = global_rollup_key_.n_keys_; if (global_rollup_key_.n_keys_ > all_groupby_exprs_.count()) { LOG_ERROR("unexpected number of partial rollup keys", K(global_rollup_key_.n_keys_)); global_rollup_key_.n_keys_ = all_groupby_exprs_.count(); partial_rollup_idx_ = all_groupby_exprs_.count(); } } else { partial_rollup_idx_ = MY_SPEC.group_exprs_.count(); } aggr_processor_.set_partial_rollup_idx(MY_SPEC.group_exprs_.count(), partial_rollup_idx_); } LOG_DEBUG("debug partial rollup keys", K(partial_rollup_idx_)); } return ret; } int ObMergeGroupBySpec::register_to_datahub(ObExecContext &ctx) const { int ret = OB_SUCCESS; if (is_parallel_) { if (OB_ISNULL(ctx.get_sqc_handler())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null unexpected", K(ret)); } else { void *buf = ctx.get_allocator().alloc(sizeof(ObRollupKeyWholeMsg::WholeMsgProvider)); if (OB_ISNULL(buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; } else { ObRollupKeyWholeMsg::WholeMsgProvider *provider = new (buf)ObRollupKeyWholeMsg::WholeMsgProvider(); ObSqcCtx &sqc_ctx = ctx.get_sqc_handler()->get_sqc_ctx(); if (OB_FAIL(sqc_ctx.add_whole_msg_provider(get_id(), *provider))) { LOG_WARN("fail add whole msg provider", K(ret)); } } } } return ret; } // 1. add row to sort for sort data in gropuby operator instead of separate sort operator // 2. calculate the NDV of group_exprs and rollup(exprs) // 3. send to QC and get the optimal rollup exprs // 4. rollup data int ObMergeGroupByOp::process_rollup_distributor() { int ret = OB_SUCCESS; if (!use_sort_data_ && MY_SPEC.is_parallel_) { // 1. get all data and calculate ndv and sort const ObIArray &child_expr = child_->get_spec().output_; while (OB_SUCC(ret)) { clear_evaluated_flag(); if (OB_FAIL(child_->get_next_row())) { if (OB_ITER_END == ret) { ret = OB_SUCCESS; break; } } else if (OB_FAIL(try_check_status())) { LOG_WARN("check status failed", K(ret)); } else if (OB_FAIL(collect_local_ndvs())) { LOG_WARN("failed to calculate ndvs", K(ret)); } else if (OB_FAIL(inner_sort_.add_row(inner_sort_exprs_))) { LOG_WARN("failed to add row", K(ret)); } } // set true and get data from inner_sort_ // it should calculate periodically, eg: per 2^n use_sort_data_ = true; if (OB_FAIL(ret)) { } else if (OB_FAIL(inner_sort_.sort())) { LOG_WARN("failed to sort rows", K(ret)); } else if (OB_FAIL(find_candidate_key(global_rollup_key_))) { LOG_WARN("failed to find candidate key", K(ret)); } else if (OB_FAIL(process_parallel_rollup_key(global_rollup_key_))) { // 2. wait QC to get the distribution keys LOG_WARN("failed to process parallel", K(ret)); } } return ret; } int ObMergeGroupByOp::inner_get_next_row() { int ret = OB_SUCCESS; const int64_t stop_output_group_id = MY_SPEC.group_exprs_.count(); const int64_t col_count = MY_SPEC.group_exprs_.count() + MY_SPEC.rollup_exprs_.count(); const int64_t group_id = MY_SPEC.has_rollup_ ? col_count : 0; bool need_dup_data = 0 < MY_SPEC.distinct_exprs_.count() && 0 == MY_SPEC.rollup_exprs_.count(); LOG_DEBUG("before inner_get_next_row", "aggr_hold_size", aggr_processor_.get_aggr_hold_size(), "aggr_used_size", aggr_processor_.get_aggr_used_size()); if (MY_SPEC.has_rollup_ && cur_output_group_id_ >= first_output_group_id_ && cur_output_group_id_ >= stop_output_group_id) { ObExpr *diff_expr = NULL; if (OB_FAIL(rewrite_rollup_column(diff_expr))) { LOG_WARN("failed to rewrite_rollup_column", K(ret)); } else if (OB_FAIL(rollup_and_calc_results(cur_output_group_id_, diff_expr))) { LOG_WARN("failed to rollup and calculate results", K(cur_output_group_id_), K(ret)); } else { --cur_output_group_id_; LOG_DEBUG("finish ouput rollup row", K(cur_output_group_id_), K(first_output_group_id_), K(ret)); } } else if (is_end_) { ret = OB_ITER_END; } else if (ObRollupStatus::ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_ && OB_FAIL(process_rollup_distributor())) { LOG_WARN("failed to process rollup distributor", K(ret)); } else { // output group results here bool is_break = false; int64_t first_diff_pos = OB_INVALID_INDEX; ObAggregateProcessor::GroupRow *group_row = NULL; if (OB_FAIL(aggr_processor_.get_group_row(group_id, group_row))) { LOG_WARN("failed to get_group_row", K(ret)); } else if (OB_ISNULL(group_row)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("group_row is null", K(ret)); } else if (NULL != last_child_output_.store_row_) { if (OB_FAIL(last_child_output_.store_row_->to_expr(child_->get_spec().output_, eval_ctx_))) { LOG_WARN("Failed to get next row", K(ret)); } } else { if (OB_FAIL(get_child_next_row())) { // get 1st iteration if (ret != OB_ITER_END) { LOG_WARN("failed to get next row", K(ret)); } } else if (0 < MY_SPEC.distinct_exprs_.count()) { // save the first row if (OB_FAIL(last_child_output_.save_store_row(child_->get_spec().output_, eval_ctx_, 0))) { LOG_WARN("failed to store child output", K(ret)); } } } LOG_DEBUG("finish merge prepare 1", K(child_->get_spec().output_), KPC(last_child_output_.store_row_)); if (OB_SUCC(ret)) { clear_evaluated_flag(); if (OB_FAIL(prepare_and_save_curr_groupby_datums(group_id, group_row, all_groupby_exprs_, all_groupby_exprs_.count(), 0))) { LOG_WARN("failed to prepare and save groupby store row", K(ret)); } else if (OB_FAIL(get_grouping_id())) { LOG_WARN("failed to get grouping id", K(ret)); } else if (OB_FAIL(aggr_processor_.prepare(*group_row))) { LOG_WARN("failed to prepare", K(ret)); } while (OB_SUCC(ret) && !is_break && OB_SUCC(get_child_next_row())) { clear_evaluated_flag(); if (OB_FAIL(try_check_status())) { LOG_WARN("check status failed", K(ret)); } else if (0 < col_count && OB_FAIL(check_same_group(group_row, first_diff_pos))) { LOG_WARN("failed to check group", K(ret)); } else if (OB_INVALID_INDEX == first_diff_pos) { //same group bool no_need_process = false; if (need_dup_data && check_unique_distinct_columns(group_row, no_need_process)) { LOG_WARN("failed to check unique distinct columns", K(ret)); } else if (!no_need_process && OB_FAIL(aggr_processor_.process(*group_row))) { LOG_WARN("failed to calc aggr", K(ret)); } else { LOG_DEBUG("process row", K(no_need_process), K(ROWEXPR2STR(eval_ctx_, child_->get_spec().output_))); } } else { //different group if (OB_FAIL(last_child_output_.save_store_row(child_->get_spec().output_, eval_ctx_, 0))) { LOG_WARN("failed to store child output", K(ret)); } else if (OB_FAIL(restore_groupby_datum(group_row, first_diff_pos))) { LOG_WARN("failed to restore_groupby_datum", K(ret)); } else if (OB_FAIL(rollup_and_calc_results(group_id))) { LOG_WARN("failed to rollup and calculate results", K(group_id), K(ret)); } else { is_break = true; if(MY_SPEC.has_rollup_) { if (ROLLUP_COLLECTOR == MY_SPEC.rollup_status_) { if (0 > cur_grouping_id_) { // if grouping_id is less than 0, then it's base row for rollup collector // +1 is added for grouping_id that is added to group_exprs_ if (ObThreeStageAggrStage::THIRD_STAGE == MY_SPEC.aggr_stage_) { partial_rollup_idx_ = -cur_grouping_id_; } else { partial_rollup_idx_ = -cur_grouping_id_ + 1; } first_output_group_id_ = max(partial_rollup_idx_, first_diff_pos + 1); } else { // don't rollup if grouping is actual value, it's already partial rollup row first_output_group_id_ = all_groupby_exprs_.count(); // partial_rollup_idx_ = all_groupby_exprs_.count(); } cur_output_group_id_ = group_id - 1; } else { first_output_group_id_ = first_diff_pos + 1; cur_output_group_id_ = INT64_MAX != partial_rollup_idx_ ? partial_rollup_idx_ - 1: group_id - 1; max_output_group_id_ = cur_output_group_id_; } } } } } // end while if (OB_ITER_END == ret) { // the last group is_end_ = true; if (OB_FAIL(restore_groupby_datum(group_row, 0))) { LOG_WARN("failed to restore_groupby_datum", K(ret)); } else if (OB_FAIL(rollup_and_calc_results(group_id))) { LOG_WARN("failed to rollup and calculate results", K(group_id), K(ret)); } else { if (MY_SPEC.has_rollup_) { if (ROLLUP_COLLECTOR == MY_SPEC.rollup_status_) { if (0 > cur_grouping_id_) { // has base row, we will calculate rollup_row if (ObThreeStageAggrStage::THIRD_STAGE == MY_SPEC.aggr_stage_) { partial_rollup_idx_ = -cur_grouping_id_; } else { partial_rollup_idx_ = -cur_grouping_id_ + 1; } first_output_group_id_ = partial_rollup_idx_; } else { // if last group is non-base, then different pos must be rollup_id_expr, // and it must rollup all rows already first_output_group_id_ = all_groupby_exprs_.count(); } cur_output_group_id_ = group_id - 1; } else { first_output_group_id_ = 0; cur_output_group_id_ = INT64_MAX != partial_rollup_idx_ ? partial_rollup_idx_ - 1: group_id - 1; max_output_group_id_ = cur_output_group_id_; } } } LOG_DEBUG("finish iter end", K(first_output_group_id_), K(cur_output_group_id_), K(MY_SPEC.rollup_status_), K(MY_SPEC.id_)); } } } LOG_TRACE("after inner_get_next_row", "aggr_hold_size", aggr_processor_.get_aggr_hold_size(), "aggr_used_size", aggr_processor_.get_aggr_used_size()); return ret; } int ObMergeGroupByOp::get_child_next_batch_row( const int64_t max_row_cnt, const ObBatchRows *&batch_rows) { int ret = OB_SUCCESS; if (use_sort_data_) { int64_t read_rows = 0; batch_rows = &sort_batch_rows_; if (OB_FAIL(inner_sort_.get_next_batch( inner_sort_exprs_, max_row_cnt, read_rows))) { if (OB_ITER_END == ret) { ret = OB_SUCCESS; const_cast(batch_rows)->size_ = 0; const_cast(batch_rows)->end_ = true; LOG_DEBUG("debug to get sorted row", K(ret), K(max_row_cnt), K(const_cast(batch_rows)->size_), K(ret)); } else { LOG_WARN("failed to get sorted row", K(ret)); } } else { const_cast(batch_rows)->size_ = read_rows; const_cast(batch_rows)->end_ = false; if (first_batch_from_sort_) { int64_t max_size = MY_SPEC.max_batch_size_; const_cast(batch_rows)->skip_->reset(max_size); first_batch_from_sort_ = false; } else { // if has rollup, then don't duplicate data in get_next_batch/row // use unique_sort_op_ to duplicate data // so skip_ don'e reset [ batch_rows->skip_->reset(max_row_cnt); ] } LOG_DEBUG("debug to get sorted row", K(ret), K(max_row_cnt), K(const_cast(batch_rows)->size_)); } } else { if (OB_FAIL(child_->get_next_batch(max_row_cnt, batch_rows))) { LOG_WARN("failed to get child row", K(ret)); } } return ret; } void ObMergeGroupByOp::sets( ObHyperLogLogCalculator &ndv_calculator, uint64_t *hash_vals, ObBitVector *skip, int64_t count) { int ret = OB_SUCCESS; for (int64_t i = 0; i < count; i++) { if (OB_NOT_NULL(skip) && skip->at(i)) { continue; } ndv_calculator.set(hash_vals[i]); } } int ObMergeGroupByOp::batch_collect_local_ndvs(const ObBatchRows *child_brs) { int ret = OB_SUCCESS; int64_t n_group = MY_SPEC.group_exprs_.count(); // same as hash groupby uint64_t hash_value_seed = 99194853094755497L; ObDatum *datum = nullptr; for (int64_t i = 0; i < all_groupby_exprs_.count() && OB_SUCC(ret); ++i) { ObExpr *expr = all_groupby_exprs_.at(i); if (OB_FAIL(expr->eval_batch(eval_ctx_, *child_brs->skip_, child_brs->size_))) { LOG_WARN("failed to eval expr", K(ret)); } else { bool is_batch_seed = (0 != i); ObDatum &curr_datum = expr->locate_batch_datums(eval_ctx_)[0]; if (0 == i) { expr->basic_funcs_->murmur_hash_batch_(rollup_hash_vals_, &curr_datum, expr->is_batch_result(), *child_brs->skip_, child_brs->size_, &hash_value_seed, is_batch_seed); } else { expr->basic_funcs_->murmur_hash_batch_(rollup_hash_vals_, &curr_datum, expr->is_batch_result(), *child_brs->skip_, child_brs->size_, rollup_hash_vals_, is_batch_seed); } // whether it need skip??? if ((0 < n_group && i == n_group - 1) || i >= n_group) { if (0 < n_group) { sets(ndv_calculator_[i - n_group + 1], rollup_hash_vals_, child_brs->skip_, child_brs->size_); } else { sets(ndv_calculator_[i - n_group], rollup_hash_vals_, child_brs->skip_, child_brs->size_); } } } } LOG_DEBUG("debug batch collect local ndvs", K(ret)); return ret; } int ObMergeGroupByOp::batch_process_rollup_distributor(const int64_t max_row_cnt) { int ret = OB_SUCCESS; if (!use_sort_data_ && MY_SPEC.is_parallel_) { int64_t child_batch_cnt = common::max(max_row_cnt, MY_SPEC.max_batch_size_); const ObBatchRows *child_brs = nullptr; // 1. get all data and calculate ndv and sort while (OB_SUCC(ret)) { clear_evaluated_flag(); if (OB_FAIL(child_->get_next_batch(child_batch_cnt, child_brs))) { if (OB_ITER_END == ret) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: return error code iter_end", K(ret)); } LOG_WARN("failed to get child batch", K(ret)); } else if (child_brs->end_ && child_brs->size_ == 0) { LOG_DEBUG("reach iterating end with empty result, do nothing"); break; } else if (OB_FAIL(try_check_status())) { LOG_WARN("check status failed", K(ret)); } else if (OB_FAIL(batch_collect_local_ndvs(child_brs))) { LOG_WARN("failed to calculate ndvs", K(ret)); } else if (OB_FAIL(inner_sort_.add_batch(inner_sort_exprs_, *child_brs->skip_, child_brs->size_, 0, nullptr))) { LOG_WARN("failed to add row", K(ret)); } } // set true and get data from inner_sort_ // 2. wait QC to get the distribution keys use_sort_data_ = true; if (OB_FAIL(ret)) { } else if (OB_FAIL(inner_sort_.sort())) { LOG_WARN("failed to sort rows", K(ret)); } else if (OB_FAIL(find_candidate_key(global_rollup_key_))) { LOG_WARN("failed to find candidate key", K(ret)); } else if (OB_FAIL(process_parallel_rollup_key(global_rollup_key_))) { LOG_WARN("failed to process parallel", K(ret)); } else { clear_evaluated_flag(); LOG_DEBUG("debug batch process distributor", K(ret)); } } return ret; } int ObMergeGroupByOp::inner_get_next_batch(const int64_t max_row_cnt) { // TODO qubin.qb: support rollup in next release int ret = OB_SUCCESS; int64_t output_batch_cnt = common::min(max_row_cnt, MY_SPEC.max_batch_size_); int64_t child_batch_cnt = common::max(max_row_cnt, MY_SPEC.max_batch_size_); const ObBatchRows *child_brs = nullptr; LOG_DEBUG("before inner_get_next_batch", "aggr_hold_size", aggr_processor_.get_aggr_hold_size(), "aggr_used_size", aggr_processor_.get_aggr_used_size(), K(output_batch_cnt), K(max_row_cnt)); if (is_output_queue_not_empty()) { // consume aggr results generated in previous round if (OB_FAIL(calc_batch_results(is_end_, output_batch_cnt))) { LOG_WARN("failed to calc output results", K(ret)); } } else { LOG_DEBUG("begin to get_next_batch rows from child", K(child_batch_cnt)); set_output_queue_cnt(0); if (curr_group_rowid_ > common::OB_INVALID_INDEX && OB_FAIL(brs_holder_.restore())) { LOG_WARN("failed to restore previous exprs", K(ret)); } else if (ObRollupStatus::ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_ && OB_FAIL(batch_process_rollup_distributor(child_batch_cnt))) { LOG_WARN("failed to process rollup distributor", K(ret)); } else { // do nothing: 1st iteration, no previous aggregation } if (OB_SUCC(ret)) { while (OB_SUCC(ret) && OB_SUCC(get_child_next_batch_row(child_batch_cnt, child_brs))) { if (child_brs->end_ && child_brs->size_ == 0) { LOG_DEBUG("reach iterating end with empty result, do nothing"); break; } clear_evaluated_flag(); if (OB_FAIL(try_check_status())) { LOG_WARN("check status failed", K(ret)); } else if (OB_FAIL(groupby_datums_eval_batch(*(child_brs->skip_), child_brs->size_))) { LOG_WARN("failed to calc_groupby_datums", K(ret)); } else if (OB_FAIL(process_batch(*child_brs))) { LOG_WARN("failed to process_batch_result", K(ret)); } else if (stop_batch_iterating(*child_brs, output_batch_cnt)) { // backup child exprs for this round OZ(brs_holder_.save(std::min(MY_SPEC.max_batch_size_, get_output_queue_cnt()))); LOG_DEBUG("break out of iteratation", K(child_brs->end_), K(output_batch_cnt), K(output_queue_cnt_)); break; } else { // do nothing } } if (OB_SUCC(ret) && child_brs->end_ && !OB_ISNULL(cur_group_row_)) { // add last unfinised grouprow into output group inc_output_queue_cnt(); if (MY_SPEC.has_rollup_) { int64_t start_rollup_id = MY_SPEC.group_exprs_.count() - 1; int64_t end_rollup_id = all_groupby_exprs_.count() - 1; int64_t max_group_idx = MY_SPEC.group_exprs_.count() - 1; if (ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_) { end_rollup_id = partial_rollup_idx_ - 1; } else if (ROLLUP_COLLECTOR == MY_SPEC.rollup_status_) { if (0 <= cur_grouping_id_) { // if grouping_id is equal or greater than 0, then it's not base row, don't rollup row start_rollup_id = all_groupby_exprs_.count(); } else { // if grouping_id is less than 0, then it's base row for rollup collector // +1 is added for grouping_id that is added to group_exprs_ if (ObThreeStageAggrStage::THIRD_STAGE == MY_SPEC.aggr_stage_) { partial_rollup_idx_ = -cur_grouping_id_ - 1; } else { partial_rollup_idx_ = -cur_grouping_id_; } start_rollup_id = max(partial_rollup_idx_, MY_SPEC.group_exprs_.count() - 1); max_group_idx = start_rollup_id; } } LOG_DEBUG("debug grouping_id", K(end_rollup_id), K(start_rollup_id), K(max_group_idx)); if (end_rollup_id >= start_rollup_id && OB_FAIL(gen_rollup_group_rows( start_rollup_id, end_rollup_id, max_group_idx, curr_group_rowid_))) { LOG_WARN("failed to genereate rollup group row", K(ret)); } } } if (OB_SUCC(ret) && OB_FAIL(calc_batch_results(child_brs->end_, output_batch_cnt))) { LOG_WARN("failed to calc output results", K(ret)); } } } LOG_DEBUG("after inner_get_next_batch", "aggr_hold_size", aggr_processor_.get_aggr_hold_size(), "aggr_used_size", aggr_processor_.get_aggr_used_size(), K(output_batch_cnt), K(ret)); return ret; } int ObMergeGroupByOp::set_null(int64_t idx, ObChunkDatumStore::StoredRow *rollup_store_row) { int ret = OB_SUCCESS; if (0 > idx) { } else { OZ(rollup_store_row->set_null(idx)); LOG_DEBUG("set null", K(idx), K(MY_SPEC.rollup_exprs_.count())); if (has_dup_group_expr_) { int64_t start_idx = idx - MY_SPEC.group_exprs_.count(); ObExpr *base_expr = MY_SPEC.rollup_exprs_.at(start_idx); for (int i = start_idx + 1; i < MY_SPEC.rollup_exprs_.count() && OB_SUCC(ret); ++i) { if (base_expr == MY_SPEC.rollup_exprs_.at(i)) { // set null to the same expr OZ(rollup_store_row->set_null(i + MY_SPEC.group_exprs_.count())); LOG_DEBUG("set null", K(i + MY_SPEC.group_exprs_.count()), K(start_idx), K(MY_SPEC.rollup_exprs_.count())); } } } } return ret; } int ObMergeGroupByOp::get_rollup_row( int64_t prev_group_row_id, int64_t group_row_id, ObAggregateProcessor::GroupRow *&curr_group_row, bool &need_set_null, int64_t idx) { int ret = OB_SUCCESS; curr_group_row = nullptr; need_set_null = false; ObAggregateProcessor::GroupRow *prev_group_row = nullptr; (void) aggr_processor_.get_group_row(prev_group_row_id, prev_group_row); if (OB_ISNULL(prev_group_row)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to get prev group row", K(ret), K(prev_group_row_id)); } else if (group_row_id < aggr_processor_.get_group_rows_count()) { // critical path: reuse grouprow directly no defensive check (void) aggr_processor_.get_group_row(group_row_id, curr_group_row); } if (OB_FAIL(ret)) { } else if (nullptr == curr_group_row) { if (OB_FAIL(aggr_processor_.init_one_group(group_row_id, true))) { LOG_WARN("failed to init_one_group", K(ret)); } else if (OB_FAIL(aggr_processor_.get_group_row(group_row_id, curr_group_row))) { LOG_WARN("failed to get_group_row", K(ret)); // performance critical: use curr_group_row directly, no defensive check } else if (OB_FAIL(aggr_processor_.prepare_in_batch_mode(curr_group_row))) { LOG_WARN("fail to prepare the aggr func", K(ret)); } else { // deep copy from prev group row ObChunkDatumStore::LastStoredRow *groupby_store_row = nullptr; if (OB_FAIL(get_groupby_store_row(group_row_id, &groupby_store_row))) { LOG_WARN("failed to get_groupby_store_row", K(ret)); } else if (OB_FAIL(groupby_store_row->save_store_row( *prev_group_row->groupby_store_row_, ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_ ? ROLLUP_BASE_ROW_EXTRA_SIZE : 0))) { LOG_WARN("failed to store group row", K(ret)); } else { curr_group_row->groupby_store_row_ = groupby_store_row->store_row_; if (ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_) { *reinterpret_cast(curr_group_row->groupby_store_row_->get_extra_payload()) = group_row_id; } need_set_null = true; if (0 <= idx) { // if the expr in rollup in group by or the expr exists more than one tiem, // then only the first expr need to set null // eg: group by c1, rollup(c1,c1) // then don't reset null OZ(set_null(idx, curr_group_row->groupby_store_row_)); } } } } else if (nullptr == curr_group_row->groupby_store_row_) { // deep copy from prev group row ObChunkDatumStore::LastStoredRow *groupby_store_row = nullptr; if (OB_FAIL(get_groupby_store_row(group_row_id, &groupby_store_row))) { LOG_WARN("failed to get_groupby_store_row", K(ret)); } else if (OB_FAIL(groupby_store_row->save_store_row( *prev_group_row->groupby_store_row_, ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_ ? ROLLUP_BASE_ROW_EXTRA_SIZE : 0))) { LOG_WARN("failed to store group row", K(ret)); } else { curr_group_row->groupby_store_row_ = groupby_store_row->store_row_; if (ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_) { *reinterpret_cast(curr_group_row->groupby_store_row_->get_extra_payload()) = group_row_id; } need_set_null = true; if (0 <= idx) { OZ(set_null(idx, curr_group_row->groupby_store_row_)); } } } return ret; } int ObMergeGroupByOp::get_empty_rollup_row( int64_t group_row_id, ObAggregateProcessor::GroupRow *&curr_group_row) { int ret = OB_SUCCESS; curr_group_row = nullptr; ObChunkDatumStore::LastStoredRow *store_row = nullptr; if (group_row_id < aggr_processor_.get_group_rows_count()) { // critical path: reuse grouprow directly no defensive check (void) aggr_processor_.get_group_row(group_row_id, curr_group_row); if (OB_ISNULL(curr_group_row)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to get group row", K(ret)); } } if (OB_FAIL(ret)) { } else if (nullptr == curr_group_row) { if (OB_FAIL(aggr_processor_.init_one_group(group_row_id))) { LOG_WARN("failed to init_one_group", K(ret)); } else if (OB_FAIL(aggr_processor_.get_group_row(group_row_id, curr_group_row))) { LOG_WARN("failed to get_group_row", K(ret)); // performance critical: use curr_group_row directly, no defensive check } else if (OB_FAIL(aggr_processor_.prepare_in_batch_mode(curr_group_row))) { LOG_WARN("fail to prepare the aggr func", K(ret)); } else if (OB_FAIL(get_groupby_store_row(group_row_id, &store_row))) { LOG_WARN("failed to get groupby store row", K(ret), K(group_row_id)); } else if (OB_ISNULL(store_row)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: store row is null", K(ret)); } else if (output_groupby_rows_.count() != aggr_processor_.get_group_rows_count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: store row is null", K(ret)); } } return ret; } int ObMergeGroupByOp::get_grouping_id() { int ret = OB_SUCCESS; if (ROLLUP_COLLECTOR == MY_SPEC.rollup_status_) { ObDatum *datum = nullptr; if (OB_FAIL(MY_SPEC.rollup_id_expr_->eval(eval_ctx_, datum))) { LOG_WARN("failed to eval aggr_code_expr", K(ret)); } else { cur_grouping_id_ = datum->get_int(); LOG_DEBUG("debug partial rollup idx", K(cur_grouping_id_)); } } return ret; } int ObMergeGroupByOp::get_cur_group_row( int64_t group_row_id, ObAggregateProcessor::GroupRow *&curr_group_row, ObIArray &group_exprs, const int64_t group_count) { int ret = OB_SUCCESS; curr_group_row = nullptr; if (group_row_id < aggr_processor_.get_group_rows_count()) { // critical path: reuse grouprow directly no defensive check (void) aggr_processor_.get_group_row(group_row_id, curr_group_row); if (OB_ISNULL(curr_group_row)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to get group row", K(ret), K(group_row_id)); } else if (OB_FAIL(prepare_and_save_curr_groupby_datums( curr_group_rowid_, curr_group_row, group_exprs, group_count, ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_ ? ROLLUP_BASE_ROW_EXTRA_SIZE : 0))) { LOG_WARN("failed to prepare group datums", K(ret)); } else if (OB_FAIL(get_grouping_id())) { LOG_WARN("failed to get grouping id", K(ret)); } } else { if (OB_FAIL(aggr_processor_.init_one_group(group_row_id))) { LOG_WARN("failed to init_one_group", K(ret)); } else if (OB_FAIL(aggr_processor_.get_group_row(group_row_id, curr_group_row))) { LOG_WARN("failed to get_group_row", K(ret)); // performance critical: use curr_group_row directly, no defensive check } else if (OB_FAIL(aggr_processor_.prepare_in_batch_mode(curr_group_row))) { LOG_WARN("fail to prepare the aggr func", K(ret)); } else if (OB_FAIL(prepare_and_save_curr_groupby_datums( curr_group_rowid_, curr_group_row, group_exprs, group_count, ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_ ? ROLLUP_BASE_ROW_EXTRA_SIZE : 0))) { LOG_WARN("failed to eval_aggr_param_batch"); } else if (OB_FAIL(get_grouping_id())) { LOG_WARN("failed to get grouping id", K(ret)); } } return ret; } int ObMergeGroupByOp::set_all_null(int64_t start, int64_t end, int64_t max_group_idx, ObChunkDatumStore::StoredRow *rollup_store_row) { int ret = OB_SUCCESS; for (int64_t i = end - 1; i >= start && OB_SUCC(ret); --i) { if (!MY_SPEC.is_duplicate_rollup_expr_.at(i - MY_SPEC.group_exprs_.count())) { if (OB_FAIL(set_null(i, rollup_store_row))) { LOG_WARN("failed to set null", K(ret), K(i)); } } } return ret; } /* * generate rollup group row by start_diff_group_idx and cur_rollup_idx * eg: count(*) group by c1, rollup(c2,c3,c4) * c1 c2 c3 c4 count(*) rollup_group_row output * 1 1 1 1 * 1 1 1 2 -> (1,1,1,null, 1) N * 1 1 3 2 -> (1,1,1,null, 2) Y * (1,1,null,null, 2) N * 2 2 3 2 -> (1,1,3,null, 1) Y * (1,1,null,null, 3) Y * (1,null,null,null, 3) Y * iter end * (2,2,3,null, 1) Y * (2,2,null,null, 1) Y * (2,null,null,null, 1) Y */ int ObMergeGroupByOp::gen_rollup_group_rows( int64_t start_diff_group_idx, int64_t end_group_idx, int64_t max_group_idx, int64_t cur_group_row_id) { int ret = OB_SUCCESS; int64_t cur_rollup_group_id = cur_group_row_id; int64_t prev_group_row_id = cur_rollup_group_id; int64_t cur_rollup_idx = end_group_idx; int64_t start_rollup_idx = start_diff_group_idx; int64_t null_idx = all_groupby_exprs_.count(); const int64_t group_exprs_cnt = MY_SPEC.group_exprs_.count(); ObAggregateProcessor::GroupRow *curr_group_row = nullptr; bool need_set_null = false; for (int64_t idx = cur_rollup_idx; OB_SUCC(ret) && idx >= start_rollup_idx; --idx) { cur_rollup_group_id = idx; if (idx <= max_group_idx) { } else if (OB_FAIL(get_rollup_row(prev_group_row_id, cur_rollup_group_id, curr_group_row, need_set_null, MY_SPEC.is_duplicate_rollup_expr_.at(idx - group_exprs_cnt) ? -1 : idx))) { LOG_WARN("failed to get one new group row", K(ret)); } else if (idx == end_group_idx && need_set_null && end_group_idx < all_groupby_exprs_.count() - 1 && OB_FAIL(set_all_null(end_group_idx + 1, all_groupby_exprs_.count(), max_group_idx, curr_group_row->groupby_store_row_))) { LOG_WARN("failed to set all null", K(ret)); } else if (OB_FAIL(aggr_processor_.rollup_batch_process( prev_group_row_id, cur_rollup_group_id, MY_SPEC.is_duplicate_rollup_expr_.at(idx - group_exprs_cnt) ? null_idx : idx, all_groupby_exprs_.count()))) { LOG_WARN("failed to rollup process", K(ret)); } if (OB_FAIL(ret)) { } else if (idx != cur_rollup_idx) { // output prev rollup group row ++curr_group_rowid_; curr_group_row = nullptr; inc_output_queue_cnt(); if (OB_FAIL(get_empty_rollup_row(curr_group_rowid_, curr_group_row))) { LOG_WARN("failed to get one new group row", K(ret)); } else if (OB_FAIL(aggr_processor_.swap_group_row(prev_group_row_id, curr_group_rowid_))) { LOG_WARN("failed to swap group row", K(ret)); } else { // It must be same as swap group row std::swap(output_groupby_rows_[prev_group_row_id], output_groupby_rows_[curr_group_rowid_]); LOG_DEBUG("debug gen rollup group row", K(prev_group_row_id), K(curr_group_rowid_), K(cur_rollup_group_id), K(output_queue_cnt_), K(start_diff_group_idx), K(idx), K(max_group_idx)); } } prev_group_row_id = idx; } return ret; } int ObMergeGroupByOp::process_batch(const ObBatchRows &brs) { int ret = OB_SUCCESS; uint32_t group_start_idx = 0; uint32_t group_end_idx = 0; ObDatum *prev_cells = nullptr; bool found_new_group = false; int64_t diff_group_idx = -1; int64_t group_count = MY_SPEC.group_exprs_.count(); int64_t all_group_cnt = all_groupby_exprs_.count(); bool no_need_process = false; bool need_dup_data = 0 < MY_SPEC.distinct_exprs_.count() && 0 == MY_SPEC.rollup_exprs_.count(); LOG_DEBUG("begin process_batch_results", K(brs.size_), K(group_start_idx), K(group_end_idx), K(curr_group_rowid_)); if (OB_FAIL(aggr_processor_.eval_aggr_param_batch(brs))) { LOG_WARN("failed to eval_aggr_param_batch"); } ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_); batch_info_guard.set_batch_size(brs.size_); if (OB_FAIL(ret)) { } else if (all_group_cnt > 0) { cur_group_last_row_idx_ = -1; for (int idx = 0; idx < brs.size_ && OB_SUCC(ret); idx++) { if (brs.skip_->at(idx)) { continue; } batch_info_guard.set_batch_idx(idx); // check new group found_new_group = false; no_need_process = false; if (nullptr == cur_group_row_) { // only first group if (OB_FAIL(get_cur_group_row(curr_group_rowid_, cur_group_row_, all_groupby_exprs_, all_groupby_exprs_.count()))) { LOG_WARN("failed to get one new group row", K(ret)); } else { is_first_calc_ = true; cur_group_last_row_idx_ = idx; } } if (OB_FAIL(ret)) { } else if (OB_ISNULL(cur_group_row_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: cur_group_row_ is null", K(ret)); } else { prev_cells = cur_group_row_->groupby_store_row_->cells(); } for (int64_t i = 0; OB_SUCC(ret) && !found_new_group && i < all_group_cnt; ++i) { const ObDatum &last_datum = prev_cells[i]; ObExpr *expr = all_groupby_exprs_.at(i); // performance critical: use expr directly NO defensive check ObDatum &result = expr->locate_expr_datum(eval_ctx_); if (0 != expr->basic_funcs_->null_first_cmp_(last_datum, result)) { found_new_group = true; if (i < group_count) { diff_group_idx = std::max(i, MY_SPEC.group_exprs_.count() - 1); } else { diff_group_idx = i; } } } if (OB_FAIL(ret)) { } else if (!found_new_group && need_dup_data && check_unique_distinct_columns_for_batch(no_need_process, idx)) { LOG_WARN("failed to check unique distinct columns", K(ret)); } else if (no_need_process) { // set the currunt row should not be processed brs.skip_->set(idx); } else if (found_new_group) { // calc last group result group_end_idx = idx; cur_group_last_row_idx_ = idx; LOG_DEBUG("new group found, calc ast group result", K(brs.size_), K(group_start_idx), K(group_end_idx), K(idx), K(curr_group_rowid_), K(output_queue_cnt_)); inc_output_queue_cnt(); if (OB_FAIL(aggregate_group_rows(curr_group_rowid_, brs, group_start_idx, group_end_idx))) { LOG_WARN("failed to aggregate_group_rows", K(curr_group_rowid_), K(ret), K(group_start_idx), K(group_end_idx)); } else { if (MY_SPEC.has_rollup_) { int64_t start_rollup_id = diff_group_idx; int64_t end_rollup_id = all_groupby_exprs_.count() - 1; int64_t max_group_idx = MY_SPEC.group_exprs_.count() - 1; if (ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_) { start_rollup_id = min(partial_rollup_idx_ - 1, start_rollup_id); end_rollup_id = partial_rollup_idx_ - 1; } else if (ROLLUP_COLLECTOR == MY_SPEC.rollup_status_) { if (0 <= cur_grouping_id_) { // if grouping_id is not greater than 0, then it's not base row, don't rollup row start_rollup_id = all_groupby_exprs_.count(); } else { // if grouping_id is less than 0, then it's base row for rollup collector // +1 is added for grouping_id that is added to group_exprs_ if (ObThreeStageAggrStage::THIRD_STAGE == MY_SPEC.aggr_stage_) { partial_rollup_idx_ = -cur_grouping_id_ - 1; } else { partial_rollup_idx_ = -cur_grouping_id_; } start_rollup_id = max(start_rollup_id, partial_rollup_idx_); max_group_idx = max(start_rollup_id - 1, partial_rollup_idx_); } } LOG_DEBUG("debug grouping_id", K(end_rollup_id), K(start_rollup_id), K(max_group_idx)); if (end_rollup_id >= start_rollup_id && OB_FAIL(gen_rollup_group_rows( start_rollup_id, end_rollup_id, max_group_idx, curr_group_rowid_))) { LOG_WARN("failed to genereate rollup group row", K(ret)); } } ++curr_group_rowid_; // create new group if (OB_FAIL(ret)) { } else if (OB_FAIL(get_cur_group_row(curr_group_rowid_, cur_group_row_, all_groupby_exprs_, all_groupby_exprs_.count()))) { LOG_WARN("failed to get one new group row", K(ret)); } else { group_start_idx = idx; // record new start idx in next round } } } } } else { // no groupby column, equals to scalar group by if (nullptr == cur_group_row_) { if (OB_FAIL(get_cur_group_row(curr_group_rowid_, cur_group_row_, all_groupby_exprs_, all_groupby_exprs_.count()))) { LOG_WARN("failed to get new group row", K(ret)); } } } group_end_idx = brs.size_; LOG_DEBUG("calc last unfinished group row", K(brs.size_), K(found_new_group), K(curr_group_rowid_), K(group_start_idx), K(group_end_idx)); // curr_group_rowid_ is common::OB_INVALID_INDEX means all rows are skipped // therefore, do nothing when all rows are skipped if (OB_SUCC(ret) && curr_group_rowid_ != common::OB_INVALID_INDEX && OB_FAIL(aggregate_group_rows(curr_group_rowid_, brs, group_start_idx, group_end_idx))) { LOG_WARN("failed to aggregate_group_rows", K(ret), K(curr_group_rowid_), K(group_start_idx), K(group_end_idx)); } if (OB_SUCC(ret) && 0 < MY_SPEC.distinct_exprs_.count() && -1 != cur_group_last_row_idx_) { // the current row is not same as before row, then save the current row ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_); batch_info_guard.set_batch_size(brs.size_); batch_info_guard.set_batch_idx(cur_group_last_row_idx_); if (OB_FAIL(last_child_output_.save_store_row(child_->get_spec().output_, eval_ctx_, 0))) { LOG_WARN("failed to store child output", K(ret)); } } return ret; } int ObMergeGroupByOp::groupby_datums_eval_batch(const ObBitVector &skip, const int64_t size) { int ret = OB_SUCCESS; int64_t all_count = all_groupby_exprs_.count(); for (int64_t i = 0; OB_SUCC(ret) && i < all_count; i++) { ObExpr *expr = all_groupby_exprs_.at(i); if (OB_FAIL(expr->eval_batch(eval_ctx_, skip, size))) { LOG_WARN("eval failed", K(ret)); } } return ret; } // In batch mode, extra_size save the grouping_id for Rollup Distributor // others extra_size is 0 int ObMergeGroupByOp::prepare_and_save_curr_groupby_datums( int64_t curr_group_rowid, ObAggregateProcessor::GroupRow *group_row, ObIArray &group_exprs, const int64_t group_count, int64_t extra_size) { int ret = OB_SUCCESS; UNUSED(group_count); // save current groupby row ObChunkDatumStore::LastStoredRow *groupby_store_row = nullptr; if (OB_FAIL(get_groupby_store_row(curr_group_rowid, &groupby_store_row))) { LOG_WARN("failed to get_groupby_store_row", K(ret)); } else if (OB_FAIL(groupby_store_row->save_store_row(group_exprs, eval_ctx_, extra_size))) { LOG_WARN("failed to store group row", K(ret)); } else { group_row->groupby_store_row_ = groupby_store_row->store_row_; if (0 < extra_size) { *reinterpret_cast(group_row->groupby_store_row_->get_extra_payload()) = -partial_rollup_idx_; } } LOG_DEBUG("finish prepare and save groupby store row", K(group_exprs), K(ret), K(ROWEXPR2STR(eval_ctx_, group_exprs))); return ret; } int ObMergeGroupByOp::check_same_group( ObAggregateProcessor::GroupRow *cur_group_row, int64_t &diff_pos) { int ret = OB_SUCCESS; diff_pos = OB_INVALID_INDEX; int64_t all_group_cnt = all_groupby_exprs_.count(); if (0 >= all_group_cnt) { } else if (OB_ISNULL(cur_group_row->groupby_store_row_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: groupby store row is NULL", K(ret)); } else { ObDatum *prev_cells = nullptr; int64_t group_count = MY_SPEC.group_exprs_.count(); bool found_new_group = false; ObDatum *result = nullptr; prev_cells = cur_group_row->groupby_store_row_->cells(); for (int64_t i = 0; OB_SUCC(ret) && !found_new_group && i < all_group_cnt; ++i) { const ObDatum &last_datum = prev_cells[i]; ObExpr *expr = all_groupby_exprs_.at(i); // performance critical: use expr directly NO defensive check if (OB_FAIL(expr->eval(eval_ctx_, result))) { LOG_WARN("eval failed", K(ret)); } else { if (0 != expr->basic_funcs_->null_first_cmp_(last_datum, *result)) { found_new_group = true; diff_pos = i; } // end if } // end if } // end for LOG_DEBUG("finish check same group", K(diff_pos), K(ret), K(group_count), K(all_group_cnt), "row" , ROWEXPR2STR(eval_ctx_, all_groupby_exprs_), K(found_new_group)); } return ret; } int ObMergeGroupByOp::check_unique_distinct_columns( ObAggregateProcessor::GroupRow *cur_group_row, bool &is_same_before_row) { int ret = OB_SUCCESS; ObDatum *prev_cells = nullptr; ObDatum *cur_datum = nullptr; is_same_before_row = true; if (OB_ISNULL(last_child_output_.store_row_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: last child output is null", K(ret)); } else if (OB_INVALID_INDEX_INT64 == MY_SPEC.aggr_code_idx_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: invalid aggr_code_idx", K(ret), K(MY_SPEC.aggr_code_idx_)); } else { prev_cells = last_child_output_.store_row_->cells(); ObDatum &aggr_code_datum = cur_group_row->groupby_store_row_->cells()[MY_SPEC.aggr_code_idx_]; if (aggr_code_datum.get_int() >= MY_SPEC.dist_aggr_group_idxes_.count()) { // non-distinct aggregate function is_same_before_row = false; LOG_DEBUG("debug non-distinct aggregate function", K(ret), K(aggr_code_datum.get_int()), K(MY_SPEC.dist_aggr_group_idxes_.count())); } else { for (int64_t i = 0; is_same_before_row && i < distinct_col_idx_in_output_.count() && OB_SUCC(ret); ++i) { if (-1 == distinct_col_idx_in_output_.at(i)) { continue; } const ObDatum &last_datum = prev_cells[distinct_col_idx_in_output_.at(i)]; ObExpr *expr = MY_SPEC.distinct_exprs_.at(i); // performance critical: use expr directly NO defensive check if (OB_FAIL(expr->eval(eval_ctx_, cur_datum))) { LOG_WARN("eval failed", K(ret)); } else { if (0 != expr->basic_funcs_->null_first_cmp_(last_datum, *cur_datum)) { is_same_before_row = false; } // end if } // end if } // end for if (OB_SUCC(ret) && !is_same_before_row) { // it's not same as prev row, then save the new distinct row if (OB_FAIL(last_child_output_.save_store_row(child_->get_spec().output_, eval_ctx_, 0))) { LOG_WARN("failed to store child output", K(ret)); } } } LOG_DEBUG("finish check unique distinct columns", K(ret), "row" , ROWEXPR2STR(eval_ctx_, MY_SPEC.distinct_exprs_), K(is_same_before_row), K(is_first_calc_), K(aggr_code_datum.get_int()), K(MY_SPEC.dist_aggr_group_idxes_.count())); } LOG_DEBUG("finish check unique distinct columns", K(ret), "row" , ROWEXPR2STR(eval_ctx_, MY_SPEC.distinct_exprs_), K(is_same_before_row)); return ret; } int ObMergeGroupByOp::check_unique_distinct_columns_for_batch( bool &is_same_before_row, int64_t cur_row_idx) { int ret = OB_SUCCESS; if (is_first_calc_) { is_same_before_row = false; LOG_DEBUG("debug is_first_calc", K(ret), K(is_same_before_row)); } else { is_same_before_row = true; ObDatum &aggr_code_datum = cur_group_row_->groupby_store_row_->cells()[MY_SPEC.aggr_code_idx_]; if (aggr_code_datum.get_int() >= MY_SPEC.dist_aggr_group_idxes_.count()) { // non-distinct aggregate function is_same_before_row = false; LOG_DEBUG("debug non-distinct aggregate function", K(ret), K(aggr_code_datum.get_int()), K(MY_SPEC.dist_aggr_group_idxes_.count())); } else if (-1 != cur_group_last_row_idx_) { // non first batch for (int64_t i = 0; is_same_before_row && i < distinct_col_idx_in_output_.count() && OB_SUCC(ret); ++i) { if (-1 == distinct_col_idx_in_output_.at(i)) { continue; } ObExpr *expr = MY_SPEC.distinct_exprs_.at(i); ObDatumVector datums = expr->locate_expr_datumvector(eval_ctx_); if (0 != expr->basic_funcs_->null_first_cmp_(*datums.at(cur_group_last_row_idx_), *datums.at(cur_row_idx))) { is_same_before_row = false; } // end if } // end for LOG_DEBUG("debug non-distinct aggregate function", K(ret), K(aggr_code_datum.get_int()), K(MY_SPEC.dist_aggr_group_idxes_.count())); } else if (nullptr != last_child_output_.store_row_) { ObDatum *prev_cells = nullptr; prev_cells = last_child_output_.store_row_->cells(); for (int64_t i = 0; is_same_before_row && i < distinct_col_idx_in_output_.count() && OB_SUCC(ret); ++i) { if (-1 == distinct_col_idx_in_output_.at(i)) { continue; } const ObDatum &last_datum = prev_cells[distinct_col_idx_in_output_.at(i)]; ObExpr *expr = MY_SPEC.distinct_exprs_.at(i); ObDatum &result = expr->locate_expr_datum(eval_ctx_); if (0 != expr->basic_funcs_->null_first_cmp_(last_datum, result)) { is_same_before_row = false; } // end if } // end for LOG_DEBUG("finish check unique distinct columns", K(ret), K(aggr_code_datum.get_int()), "row" , ROWEXPR2STR(eval_ctx_, MY_SPEC.distinct_exprs_), K(is_same_before_row)); } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: invalid last group row", K(ret)); } } LOG_DEBUG("finish check unique distinct columns", K(ret), "row" , ROWEXPR2STR(eval_ctx_, MY_SPEC.distinct_exprs_), K(is_same_before_row)); cur_group_last_row_idx_ = cur_row_idx; is_first_calc_ = false; return ret; } int ObMergeGroupByOp::restore_groupby_datum( ObAggregateProcessor::GroupRow *cur_group_row, const int64_t diff_pos) { int ret = OB_SUCCESS; UNUSED(diff_pos); ObDatum *groupby_cells = cur_group_row->groupby_store_row_->cells(); // it must use the groupby datum // bug#32738630, if the column is case insensitive // then the different position is not the case insensitive column, // and we set the previous groupby value from the case insensitive value of different groupby, // so the case insensitive is not same if the current group use group values of the current row // case: merge_gby_return_prev_gby_col_bug.test for (int64_t i = 0; OB_SUCC(ret) && i < all_groupby_exprs_.count(); ++i) { ObDatum &last_datum = groupby_cells[i]; ObExpr *expr = all_groupby_exprs_.at(i); ObDatum &result = expr->locate_expr_datum(eval_ctx_); result.set_datum(last_datum); expr->set_evaluated_projected(eval_ctx_); LOG_DEBUG("succ to restore", K(i), KPC(expr), K(result), K(last_datum)); } LOG_DEBUG("finish restore groupby datum", K(diff_pos), K(ret)); return ret; } int ObMergeGroupByOp::fill_groupby_id_expr(const int64_t group_id) { int ret = OB_SUCCESS; ObExpr *grouping_expr = MY_SPEC.rollup_id_expr_; if (OB_ISNULL(grouping_expr)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: rollup_id_expr_ is null", K(ret)); } else if (ObRollupStatus::ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_) { // fill grouping_id to distinguish the base row and partial rollup row if (group_id == all_groupby_exprs_.count()) { // base row ObDatum &datum = grouping_expr->locate_datum_for_write(eval_ctx_); // fill the negitive n_keys as base row datum.set_int(-partial_rollup_idx_); grouping_expr->set_evaluated_projected(eval_ctx_); LOG_DEBUG("debug grouping_id expr", K(ret), K(group_id)); } else { ObDatum &datum = grouping_expr->locate_datum_for_write(eval_ctx_); datum.set_int(group_id); grouping_expr->set_evaluated_projected(eval_ctx_); LOG_DEBUG("debug grouping_id expr", K(ret), K(group_id)); } } return ret; } int ObMergeGroupByOp::rollup_and_calc_results(const int64_t group_id, const ObExpr *diff_expr /*= NULL*/) { int ret = OB_SUCCESS; const int64_t col_count = MY_SPEC.group_exprs_.count() + MY_SPEC.rollup_exprs_.count(); if (OB_UNLIKELY(group_id < 0 || group_id > col_count)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(group_id), K(col_count), K(ret)); } else if (MY_SPEC.has_rollup_ && group_id > 0) { int64_t rollup_group_id = group_id - 1; // Firstly process rollup, it need process partial rollup if (ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_ && INT64_MAX != partial_rollup_idx_ && group_id == all_groupby_exprs_.count()) { // firstly, it has partial rollup and get the first rollup group_id // eg: c1,c2,rollup(c3,c4,c5) // then // group_id // 5 (c1, c2, c3, c4, c5) // 4 (c1, c2, c3, c4, null) // 3 (c1, c2, c3, null, null) // 2 (c1, c2, null, null, null) // if partial rollup key is (c1,c2,c3,c4), // then we should calculate group_id 3 startly when partial rollup rollup_group_id = partial_rollup_idx_ - 1; } else if (ROLLUP_COLLECTOR == MY_SPEC.rollup_status_) { // base row need rollup row if (0 <= cur_grouping_id_) { rollup_group_id = INT64_MAX; } } // if group_id is last rollup row, then rollup_process // don't calcuate grouping of group_id and rollup_group_id // and collect will calcuate grouping of group_id if (INT64_MAX != rollup_group_id && OB_FAIL(aggr_processor_.rollup_process(group_id, rollup_group_id, MY_SPEC.group_exprs_.count(), diff_expr))) { LOG_WARN("failed to rollup aggregation results", K(ret)); } } if (OB_SUCC(ret)) { clear_evaluated_flag(); if (ObRollupStatus::ROLLUP_DISTRIBUTOR == MY_SPEC.rollup_status_ && OB_FAIL(fill_groupby_id_expr(group_id))) { LOG_WARN("failed to fill rollup_id_expr", K(ret), K(group_id)); } else if (OB_FAIL(aggr_processor_.collect(group_id, diff_expr, MY_SPEC.group_exprs_.count()))) { LOG_WARN("failed to collect aggr result", K(group_id), K(ret)); } else if (OB_FAIL(aggr_processor_.reuse_group(group_id))) { LOG_WARN("failed to reuse group", K(group_id), K(ret)); } else { LOG_DEBUG("finish rollup and calc results", K(group_id), K(is_end_), "row", ROWEXPR2STR(eval_ctx_, MY_SPEC.output_)); } } return ret; } int ObMergeGroupByOp::calc_batch_results(const bool is_iter_end, const int64_t max_output_size) { // TODO: support rollup logic int ret = OB_SUCCESS; if (nullptr == cur_group_row_) { // get empty result from child, just return empty brs_.size_ = 0; brs_.end_ = is_iter_end; LOG_DEBUG("debug cur_group_row_ is null", K(ret)); } else { if (is_iter_end) { is_end_ = true; } // make sure output size is less than aggr group_rows int64_t output_size = common::min(get_output_queue_cnt(), max_output_size); clear_evaluated_flag(); // note: brs_.size_ is set in collect_result_batch if (OB_FAIL(aggr_processor_.collect_result_batch( all_groupby_exprs_, output_size, brs_, cur_output_group_id_))) { LOG_WARN("failed to collect batch result", K(ret)); } else { LOG_DEBUG("collect result done", K(cur_output_group_id_), K(get_output_queue_cnt()), K(is_iter_end), K(output_size), K(aggr_processor_.get_group_rows_count())); set_output_queue_cnt(get_output_queue_cnt() - brs_.size_); if (!is_output_queue_not_empty()) { LOG_DEBUG("all output row are consumed, do the cleanup", K(cur_output_group_id_), K(get_output_queue_cnt()), K(is_iter_end), K(output_size), K(aggr_processor_.get_group_rows_count()), K(curr_group_rowid_)); // aggregation rows cleanup and reuse int64_t start_pos = MY_SPEC.has_rollup_ ? all_groupby_exprs_.count(): 0; for (auto i = start_pos; OB_SUCC(ret) && i < curr_group_rowid_; i++) { if (OB_FAIL(aggr_processor_.reuse_group(i, false))) { LOG_WARN("failed to collect result ", K(ret), K(i), K(eval_ctx_.get_batch_idx())); } } if (is_end_) {// all output rows are consumed, child operator eached end. mark batch end brs_.end_ = true; } else { // move last unfinished grouprow // curr_group_rowid(last group row) calculation is NOT done, move it // to group 0 ObAggregateProcessor::GroupRow *group_row = nullptr; if (OB_FAIL(aggr_processor_.swap_group_row(start_pos, curr_group_rowid_))) { LOG_WARN("failed to swap aggregation group rows", K(ret), K(curr_group_rowid_)); } else if (OB_FAIL(aggr_processor_.get_group_row(start_pos, group_row))) { LOG_WARN("failed to get group row", K(ret)); } else if (OB_ISNULL(group_row) || OB_ISNULL(group_row->groupby_store_row_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("group row is empty", K(ret), KP(group_row), K(curr_group_rowid_)); } else { std::swap(output_groupby_rows_[start_pos], output_groupby_rows_[curr_group_rowid_]); } curr_group_rowid_ = start_pos; cur_output_group_id_ = start_pos; } } } } return ret; } inline int ObMergeGroupByOp::get_groupby_store_row( int i, ObChunkDatumStore::LastStoredRow **store_row) { int ret = OB_SUCCESS; if (i < output_groupby_rows_.count()) { *store_row = output_groupby_rows_.at(i); } else if (i == output_groupby_rows_.count()) { if (OB_FAIL(create_groupby_store_row(store_row))) { LOG_WARN("failed create groupby store row", K(ret)); } } else { for (int64_t cur = output_groupby_rows_.count(); OB_SUCC(ret) && cur <= i; ++cur) { if (OB_FAIL(create_groupby_store_row(store_row))) { LOG_WARN("failed create groupby store row", K(ret)); } } } return ret; } int ObMergeGroupByOp::get_n_shuffle_keys_for_exchange(int64_t &shuffle_n_keys) { int ret = OB_SUCCESS; shuffle_n_keys = 0; if (INT64_MAX == partial_rollup_idx_ || 0 >= partial_rollup_idx_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: invalid partial rollup idx", K(ret), K(partial_rollup_idx_)); } else { // the keys of exchange contains group_exprs, [aggr_code], grouping_id and rollup_exprs // grouping_id is append shuffle expr if (MY_SPEC.group_exprs_.count() >= partial_rollup_idx_) { shuffle_n_keys = partial_rollup_idx_; } else { if (ObThreeStageAggrStage::SECOND_STAGE == MY_SPEC.aggr_stage_) { // aggr_code is groupby exprs in second stage, // but it's not groupby exprs in third stage shuffle_n_keys = partial_rollup_idx_; } else { shuffle_n_keys = partial_rollup_idx_ + 1; } LOG_TRACE("debug merge groupby shuffle keys", K(shuffle_n_keys)); } } return ret; } inline int ObMergeGroupByOp::create_groupby_store_row( ObChunkDatumStore::LastStoredRow **store_row) { int ret = OB_SUCCESS; void *buf = aggr_processor_.get_aggr_alloc().alloc( sizeof(ObChunkDatumStore::LastStoredRow)); if (OB_ISNULL(buf)) { LOG_WARN("failed alloc memory", K(ret)); ret = OB_ALLOCATE_MEMORY_FAILED; } else { *store_row = new (buf) ObChunkDatumStore::LastStoredRow(aggr_processor_.get_aggr_alloc()); if (OB_FAIL(output_groupby_rows_.push_back(*store_row))) { LOG_WARN("failed push back", K(ret)); } (*store_row)->reuse_ = true; } return ret; } } // end namespace sql } // end namespace oceanbase