/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX SQL_ENG #include "ob_limit_op.h" #include "ob_limit_vec_op.h" #include "sql/engine/ob_physical_plan.h" #include "sql/engine/ob_exec_context.h" #include "sql/engine/basic/ob_material_vec_op.h" #include "sql/engine/sort/ob_sort_op.h" #include "sql/engine/sort/ob_sort_vec_op.h" #include "sql/engine/basic/ob_material_op.h" namespace oceanbase { using namespace common; namespace sql { ObLimitVecSpec::ObLimitVecSpec(ObIAllocator &alloc, const ObPhyOperatorType type) : ObOpSpec(alloc, type), limit_expr_(NULL), offset_expr_(NULL), percent_expr_(NULL), calc_found_rows_(false), is_top_limit_(false), is_fetch_with_ties_(false), sort_columns_(alloc) { } ObLimitVecSpec::~ObLimitVecSpec() {} OB_SERIALIZE_MEMBER((ObLimitVecSpec, ObOpSpec), limit_expr_, offset_expr_, percent_expr_, calc_found_rows_, is_top_limit_, is_fetch_with_ties_, sort_columns_); ObLimitVecOp::ObLimitVecOp(ObExecContext &exec_ctx, const ObOpSpec &spec, ObOpInput *input) : ObOperator(exec_ctx, spec, input), limit_(-1), offset_(0), input_cnt_(0), output_cnt_(0), total_cnt_(0), is_percent_first_(false), pre_sort_columns_(exec_ctx.get_allocator()) { } ObLimitVecOp::~ObLimitVecOp() { destroy(); } int ObLimitVecOp::inner_open() { int ret = OB_SUCCESS; bool is_null_value = false; if (OB_ISNULL(child_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("limit vector operator has no child", K(ret)); } else if (OB_FAIL(ObLimitVecOp::get_int_val(MY_SPEC.limit_expr_, eval_ctx_, limit_, is_null_value))) { LOG_WARN("get limit values failed", K(ret)); } else if (!is_null_value && OB_FAIL(ObLimitVecOp::get_int_val(MY_SPEC.offset_expr_, eval_ctx_, offset_, is_null_value))) { LOG_WARN("get offset values failed", K(ret)); } else if (is_null_value) { offset_ = 0; limit_ = 0; } else { is_percent_first_ = NULL != MY_SPEC.percent_expr_; // revise limit, offset because rownum < -1 is rewritten as limit -1 // if *limit* < 0, then *offset_* is meaningless, so check both here together //offset 2 rows fetch next -3 rows only --> is meaningless offset_ = offset_ < 0 ? 0 : offset_; if (MY_SPEC.limit_expr_ != NULL) {//can not just set to 0, since there maybe only *offset* but not *limit* in a sql query limit_ = limit_ < 0 ? 0 : limit_; } pre_sort_columns_.reuse_ = true; } return ret; } int ObLimitVecOp::inner_rescan() { input_cnt_ = 0; output_cnt_ = 0; return ObOperator::inner_rescan(); } int ObLimitVecOp::inner_get_next_row() { return OB_ERR_UNEXPECTED; } // Batch version of ObLimitOp::inner_get_next_row int ObLimitVecOp::inner_get_next_batch(const int64_t max_row_cnt) { int ret = OB_SUCCESS; int64_t batch_cnt = min(max_row_cnt, MY_SPEC.max_batch_size_); LOG_DEBUG("ObLimitVecOp[3] get_next_batch start", K(limit_), K(offset_), K(batch_cnt), K(is_percent_first_), K(output_cnt_), K(MY_SPEC.calc_found_rows_)); const ObBatchRows *child_brs = nullptr; clear_evaluated_flag(); while (OB_SUCC(ret) && input_cnt_ < offset_) { // Note: batch_cnt is NEVER bigger than offset if (input_cnt_ + batch_cnt > offset_) { batch_cnt = offset_ - input_cnt_; } if (OB_FAIL(child_->get_next_batch(batch_cnt, child_brs))) { LOG_WARN("child_op failed to get next row", K(input_cnt_), K(offset_), K(ret)); } else if (is_percent_first_ && OB_FAIL(convert_limit_percent())) { LOG_WARN("failed to convert limit percent", K(ret)); } else { input_cnt_ += (child_brs->size_ - child_brs->skip_->accumulate_bit_cnt(child_brs->size_)); } if (child_brs->end_) { brs_.end_ = true; break; } } // end while LOG_DEBUG("ObLimitVecOp get_next_batch", K(brs_), K(input_cnt_), K(batch_cnt), K(output_cnt_)); auto skip_fetch_rows = false; if (input_cnt_ > offset_ && output_cnt_ == 0) { // offset error handling: child operator return more rows than expected // mark offset rows as skipped and continue limit logic brs_.copy(child_brs); input_cnt_ -= (brs_.size_ - brs_.skip_->accumulate_bit_cnt(brs_.size_)); for (auto i = 0; i < child_brs->size_; i++) { if (brs_.skip_->at(i)) { continue; } else { ++input_cnt_; if (input_cnt_ <= offset_) { brs_.skip_->set(i); } else { break; } } } // stop getting rows from child and setting brs_ when skip_fetch_rows is true skip_fetch_rows = true; } // now the limit part int64_t left_count = 0; batch_cnt = min(max_row_cnt, MY_SPEC.max_batch_size_); if (OB_UNLIKELY(brs_.end_) && !skip_fetch_rows) { brs_.size_ = 0; LOG_DEBUG("Offset num is bigger than child output num, return empty rows", K(offset_), K(input_cnt_), K(child_brs->size_), K(child_brs->end_)); } else if (OB_SUCC(ret)) { if (is_percent_first_ || output_cnt_ < limit_ || limit_ < 0) { // adjust iterating count for last batch if (output_cnt_ + batch_cnt > limit_ && limit_ >= 0) { batch_cnt = limit_ - output_cnt_; } if (is_percent_first_) { // Fetch one row for percent first fetch, make sure %output_cnt_ never exceed %limit_ // in this round. batch_cnt = 1; } if (!skip_fetch_rows && OB_FAIL(child_->get_next_batch(batch_cnt, child_brs))) { LOG_WARN("child_op failed to get next row", K(ret), K(limit_), K(batch_cnt)); } else if (is_percent_first_ && OB_FAIL(convert_limit_percent())) { LOG_WARN("failed to convert limit percent", K(ret)); } else if (limit_ == 0) { brs_.size_ = 0; brs_.end_ = true; } else { if (!skip_fetch_rows) { // skip copy brs_ from child as it is already copied in offset error // handing branch brs_.copy(child_brs); } output_cnt_ += (brs_.size_ - brs_.skip_->accumulate_bit_cnt(brs_.size_)); if (output_cnt_ == limit_) { // Don't mark brs_.end_, end iterating in next round if (MY_SPEC.is_fetch_with_ties_) { ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_); batch_info_guard.set_batch_size(brs_.size_); batch_info_guard.set_batch_idx(find_last_available_row_cnt(*(brs_.skip_), brs_.size_)); if (OB_FAIL(pre_sort_columns_.save_store_row(MY_SPEC.sort_columns_, brs_, eval_ctx_))) { LOG_WARN("failed to deep copy limit last rows", K(ret)); } } } else if (OB_UNLIKELY( limit_ != -1 /*limit=-1 means no limit in oracle mode*/ && output_cnt_ > limit_)) { // Notice: here is the error hanlding branch // Child branch should NOT return rows more than batch_cnt. // If it return more row, something out of expect take place. // In order to keep limit logic NOT broken, update the batch size // within limit operator to get correct result. LOG_TRACE("child operator return more rows than expected", K(output_cnt_), K(limit_), K(brs_), K(batch_cnt), K(child_->get_spec().get_type())); // recaculate output_cnt_ and update output brs_.size_ output_cnt_ -= (brs_.size_ - brs_.skip_->accumulate_bit_cnt(brs_.size_)); for (auto i = 0; i < child_brs->size_; i++) { if (brs_.skip_->at(i)) { continue; } else { ++output_cnt_; if (output_cnt_ >= limit_) { brs_.size_ = i + 1; break; } } } } } skip_fetch_rows = false; } else if (limit_ > 0 && output_cnt_ >= limit_ && MY_SPEC.is_fetch_with_ties_) { // keep fetching until a different value is found batch_cnt = min(max_row_cnt, MY_SPEC.max_batch_size_); bool keep_iterating = false; uint32_t matched_row_count = 0; if (OB_FAIL(child_->get_next_batch(batch_cnt, child_brs))) { LOG_WARN("child_op failed to get next row", K(ret), K(limit_), K(batch_cnt), K(child_brs->size_)); } else if (OB_FAIL(compare_value_in_batch(keep_iterating, child_brs, matched_row_count))) { LOG_WARN("failed to is row order by item value equal", K(ret)); } brs_.copy(child_brs); if (!keep_iterating) { brs_.end_ = true; brs_.size_ = matched_row_count; } output_cnt_ += brs_.size_; } else { brs_.end_ = true; if (MY_SPEC.calc_found_rows_) { batch_cnt = min(max_row_cnt, MY_SPEC.max_batch_size_); while (OB_SUCC(child_->get_next_batch(batch_cnt, child_brs))) { left_count += (child_brs->size_ - child_brs->skip_->accumulate_bit_cnt(child_brs->size_)); if (child_brs->end_) { break; } } if (OB_SUCCESS != ret) { LOG_WARN("fail to get next row from child", K(ret)); } } } } if (brs_.end_) { if (MY_SPEC.is_top_limit_) { total_cnt_ = left_count + output_cnt_ + input_cnt_; ObPhysicalPlanCtx *plan_ctx = NULL; if (OB_ISNULL(plan_ctx = ctx_.get_physical_plan_ctx())) { ret = OB_ERR_NULL_VALUE; LOG_WARN("get physical plan context failed"); } else { NG_TRACE_EXT(found_rows, OB_ID(total_count), total_cnt_, OB_ID(input_count), input_cnt_); plan_ctx->set_found_rows(total_cnt_); } } } LOG_DEBUG("limit_vec_op get_next_batch finished", K(batch_cnt), K(output_cnt_), K(brs_), K(limit_), K(input_cnt_)); return ret; } // batch version for is_row_order_by_item_value_equal int ObLimitVecOp::compare_value_in_batch(bool &keep_iterating, const ObBatchRows * brs, uint32_t &row_count_matched) { int ret = OB_SUCCESS; keep_iterating = true; if (MY_SPEC.sort_columns_.empty()) { // %sort_columns_ is empty if order by const value, set is_equal to true directly. // pre_sort_columns_.compact_row_ is NULL here. } else { CK(compare_vectors_.empty()); const int64_t size = MY_SPEC.sort_columns_.count(); for (int64_t i = 0; OB_SUCC(ret) && i < MY_SPEC.sort_columns_.count(); ++i) { ObExpr *expr = MY_SPEC.sort_columns_.at(i); if (OB_FAIL(expr->eval_vector(eval_ctx_, *brs))) { LOG_WARN("expression evaluate failed", K(ret)); } else if (OB_FAIL(compare_vectors_.push_back(expr->get_vector(eval_ctx_)))) { LOG_WARN("compare_vectors_ push back failed", K(ret)); } } ObCompactRow* current_waiting_compare_row = pre_sort_columns_.compact_row_; for (uint32_t row_idx = 0; OB_SUCC(ret) && keep_iterating && row_idx < brs->size_; row_idx++) { if (brs->skip_->at(row_idx)) { continue; } for (int64_t col_idx = 0; OB_SUCC(ret) && keep_iterating && col_idx < compare_vectors_.count(); ++col_idx) { ObExpr *expr = MY_SPEC.sort_columns_.at(col_idx); int cmp_ret = 0; if (OB_FAIL(compare_vectors_[col_idx]->null_first_cmp(*expr, row_idx, current_waiting_compare_row->is_null(col_idx), current_waiting_compare_row->get_cell_payload(pre_sort_columns_.row_meta_, col_idx), current_waiting_compare_row->get_length(pre_sort_columns_.row_meta_, col_idx), cmp_ret))) { LOG_WARN("compare failed", K(ret)); } else { keep_iterating = (0 == cmp_ret); } } if (keep_iterating) { row_count_matched++; } } compare_vectors_.reuse(); } return ret; } //For *percent*, you need to convert it to the corresponding *limit* count based on the total number of rows int ObLimitVecOp::convert_limit_percent() { int ret = OB_SUCCESS; double percent = 0.0; if (OB_FAIL(ObLimitVecOp::get_double_val(MY_SPEC.percent_expr_, eval_ctx_, percent))) { LOG_WARN("failed to get double value", K(ret)); } else if (percent > 0) { int64_t tot_count = 0; if (OB_UNLIKELY(limit_ != -1) || OB_ISNULL(child_) || OB_UNLIKELY(child_->get_spec().get_type() != PHY_VEC_MATERIAL && child_->get_spec().get_type() != PHY_SORT && child_->get_spec().get_type() != PHY_VEC_SORT)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(limit_), K(child_)); } else if (child_->get_spec().get_type() == PHY_MATERIAL && OB_FAIL(static_cast(child_)->get_material_row_count(tot_count))) { LOG_WARN("failed to get op row count", K(ret)); } else if (child_->get_spec().get_type() == PHY_VEC_MATERIAL && OB_FAIL(static_cast(child_)->get_material_row_count(tot_count))) { LOG_WARN("failed to get op row count", K(ret)); //TODO: so far vec_sort havn't implement, so here will cast it to old sort op //It should be replaced as PHY_VEC_SORT after new vectorization sort op is implemented } else if (child_->get_spec().get_type() == PHY_SORT && FALSE_IT(tot_count = static_cast(child_)->get_sort_row_count())) { LOG_WARN("failed to get op row count", K(ret)); } else if (child_->get_spec().get_type() == PHY_VEC_SORT && FALSE_IT(tot_count = static_cast(child_)->get_sort_row_count())) { LOG_WARN("failed to get op row count", K(ret)); } else if (OB_UNLIKELY(tot_count < 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get invalid child op row count", K(tot_count), K(ret)); } else if (percent < 100) { //Compatible with oracle, rounded up int64_t percent_int64 = static_cast(percent); int64_t offset = (tot_count * percent / 100 - tot_count * percent_int64 / 100) > 0 ? 1 : 0; limit_ = tot_count * percent_int64 / 100 + offset; is_percent_first_ = false; } else { limit_ = tot_count; is_percent_first_ = false; } } else { limit_ = 0; } return ret; } int ObLimitVecOp::get_double_val(ObExpr *expr, ObEvalCtx &eval_ctx, double &val) { int ret = OB_SUCCESS; if (NULL != expr) { OB_ASSERT(ob_is_double_tc(expr->datum_meta_.type_)); int64_t skip_int = 0; ObBitVector *skip = to_bit_vector(&skip_int); if (OB_FAIL(expr->eval_vector(eval_ctx, *skip, 1, true))) { LOG_WARN("expr evaluate failed", K(ret), K(expr)); } else if (expr->get_vector(eval_ctx)->is_null(0)) { val = 0.0; } else { val = expr->get_vector(eval_ctx)->get_double(0); } } return ret; } int ObLimitVecOp::get_int_val(ObExpr *expr, ObEvalCtx &eval_ctx, int64_t &val, bool &is_null_value) { int ret = OB_SUCCESS; if (NULL != expr) { OB_ASSERT(ob_is_int_tc(expr->datum_meta_.type_)); int64_t skip_int = 0; ObBitVector *skip = to_bit_vector(&skip_int); if (OB_FAIL(expr->eval_vector(eval_ctx, *skip, 1, true))) { LOG_WARN("expr evaluate failed", K(ret), K(expr)); } else if (expr->get_vector(eval_ctx)->is_null(0)) { is_null_value = true; val = 0.0; } else { val = expr->get_vector(eval_ctx)->get_int(0); } } return ret; } } // end namespace sql } // end namespace oceanbase