/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX SQL_ENG #include "sql/engine/join/ob_nested_loop_join_op.h" #include "sql/engine/table/ob_table_scan_op.h" #include "sql/engine/ob_exec_context.h" namespace oceanbase { using namespace common; namespace sql { OB_SERIALIZE_MEMBER((ObNestedLoopJoinSpec, ObBasicNestedLoopJoinSpec), group_rescan_, group_size_, left_expr_ids_in_other_cond_, left_rescan_params_, right_rescan_params_); ObNestedLoopJoinOp::ObNestedLoopJoinOp(ObExecContext &exec_ctx, const ObOpSpec &spec, ObOpInput *input) : ObBasicNestedLoopJoinOp(exec_ctx, spec, input), state_(JS_READ_LEFT), mem_context_(nullptr), left_store_("NljLStore"), is_left_end_(false), last_store_row_(), save_last_row_(false), defered_right_rescan_(false), batch_rescan_ctl_(), batch_state_(JS_FILL_LEFT), save_last_batch_(false), batch_mem_ctx_(NULL), stored_rows_(NULL), right_store_("NljRStore"), left_brs_(NULL), left_matched_(NULL), need_switch_iter_(false), iter_end_(false), op_max_batch_size_(0), max_group_size_(OB_MAX_BULK_JOIN_ROWS), group_join_buffer_(), match_left_batch_end_(false), match_right_batch_end_(false), l_idx_(0), no_match_row_found_(true), need_output_row_(false), left_expr_extend_size_(0) { state_operation_func_[JS_JOIN_END] = &ObNestedLoopJoinOp::join_end_operate; state_function_func_[JS_JOIN_END][FT_ITER_GOING] = NULL; state_function_func_[JS_JOIN_END][FT_ITER_END] = &ObNestedLoopJoinOp::join_end_func_end; state_operation_func_[JS_READ_LEFT] = &ObNestedLoopJoinOp::read_left_operate; state_function_func_[JS_READ_LEFT][FT_ITER_GOING] = &ObNestedLoopJoinOp::read_left_func_going; state_function_func_[JS_READ_LEFT][FT_ITER_END] = &ObNestedLoopJoinOp::read_left_func_end; state_operation_func_[JS_READ_RIGHT] = &ObNestedLoopJoinOp::read_right_operate; state_function_func_[JS_READ_RIGHT][FT_ITER_GOING] = &ObNestedLoopJoinOp::read_right_func_going; state_function_func_[JS_READ_RIGHT][FT_ITER_END] = &ObNestedLoopJoinOp::read_right_func_end; } int ObNestedLoopJoinOp::inner_open() { int ret = OB_SUCCESS; if (OB_ISNULL(left_) || OB_ISNULL(right_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("nlp_op child is null", KP(left_), KP(right_), K(ret)); } else if (OB_FAIL(ObBasicNestedLoopJoinOp::inner_open())) { LOG_WARN("failed to open in base class", K(ret)); } if (OB_SUCC(ret) && is_vectorized()) { if (MY_SPEC.group_rescan_) { max_group_size_ = OB_MAX_BULK_JOIN_ROWS + MY_SPEC.plan_->get_batch_size(); } if (OB_ISNULL(batch_mem_ctx_)) { ObSQLSessionInfo *session = ctx_.get_my_session(); uint64_t tenant_id =session->get_effective_tenant_id(); lib::ContextParam param; const int64_t mem_limit = 8 * 1024 * 1024; //8M; param.set_mem_attr(tenant_id, ObModIds::OB_SQL_NLJ_CACHE, ObCtxIds::WORK_AREA) .set_properties(lib::USE_TL_PAGE_OPTIONAL); if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(batch_mem_ctx_, param))) { LOG_WARN("create entity failed", K(ret)); } else if (OB_ISNULL(batch_mem_ctx_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null memory entity returned", K(ret)); } } if (OB_SUCC(ret)) { char *buf = (char *)batch_mem_ctx_->get_arena_allocator() .alloc(ObBitVector::memory_size(MY_SPEC.max_batch_size_)); if (OB_ISNULL(buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc", K(ret)); } else { MEMSET(buf, 0, ObBitVector::memory_size(MY_SPEC.max_batch_size_)); left_matched_ = to_bit_vector(buf); } } if (OB_SUCC(ret)) { if (OB_FAIL(left_batch_.init(&(left_->get_spec().output_), &(batch_mem_ctx_->get_arena_allocator()), MY_SPEC.max_batch_size_))) { LOG_WARN("fail to init batch", K(ret)); } else if (MY_SPEC.enable_px_batch_rescan_) { if (OB_FAIL(last_save_batch_.init(&left_->get_spec().output_, &batch_mem_ctx_->get_arena_allocator(), MY_SPEC.max_batch_size_))) { LOG_WARN("fail to init batch", K(ret)); } } } } if (OB_SUCC(ret) && MY_SPEC.group_rescan_) { if (OB_FAIL(group_join_buffer_.init(this, max_group_size_, MY_SPEC.group_size_, &MY_SPEC.rescan_params_, &MY_SPEC.left_rescan_params_, &MY_SPEC.right_rescan_params_))) { LOG_WARN("init batch info failed", KR(ret)); } } return ret; } //NLJ has its own switch_iterator int ObNestedLoopJoinOp::switch_iterator() { int ret = OB_SUCCESS; if (OB_FAIL(ObOperator::inner_switch_iterator())) { LOG_WARN("failed to inner switch iterator", K(ret)); } else if (OB_FAIL(left_->switch_iterator())) { if (OB_ITER_END != ret) { LOG_WARN("switch left child iterator failed", K(ret)); } } else { reset_buf_state(); } #ifndef NDEBUG OX(OB_ASSERT(false == brs_.end_)); #endif return ret; } int ObNestedLoopJoinOp::rescan() { int ret = OB_SUCCESS; //NLJ's rescan should only drive left child's rescan, //the right child's rescan is defer to rescan_right_operator() driven by get_next_row(); defered_right_rescan_ = true; if (!MY_SPEC.group_rescan_) { if (OB_FAIL(left_->rescan())) { LOG_WARN("rescan left child operator failed", KR(ret), "child op_type", left_->op_name()); } else if (OB_FAIL(inner_rescan())) { LOG_WARN("failed to inner rescan", KR(ret)); } } else { if (OB_FAIL(group_join_buffer_.init_above_group_params())) { LOG_WARN("init above bnlj params failed", KR(ret)); } else if (OB_FAIL(group_join_buffer_.rescan_left())) { LOG_WARN("rescan left failed", KR(ret)); } else if (OB_FAIL(inner_rescan())) { LOG_WARN("inner rescan failed", KR(ret)); } } #ifndef NDEBUG OX(OB_ASSERT(false == brs_.end_)); #endif return ret; } int ObNestedLoopJoinOp::inner_rescan() { int ret = OB_SUCCESS; reset_buf_state(); set_param_null(); if (OB_FAIL(ObBasicNestedLoopJoinOp::inner_rescan())) { LOG_WARN("failed to rescan", K(ret)); } return ret; } int ObNestedLoopJoinOp::inner_get_next_row() { int ret = OB_SUCCESS; if (OB_UNLIKELY(LEFT_SEMI_JOIN == MY_SPEC.join_type_ || LEFT_ANTI_JOIN == MY_SPEC.join_type_)) { if (OB_FAIL(join_row_with_semi_join())) { if (ret != OB_ITER_END) { LOG_WARN("failed to join row with semi join", K(ret)); } } } else { state_operation_func_type state_operation = NULL; state_function_func_type state_function = NULL; int func = -1; output_row_produced_ = false; while (OB_SUCC(ret) && !output_row_produced_) { state_operation = this->ObNestedLoopJoinOp::state_operation_func_[state_]; if (OB_ITER_END == (ret = (this->*state_operation)())) { func = FT_ITER_END; ret = OB_SUCCESS; } else if (OB_FAIL(ret)) { LOG_WARN("failed state operation", K(ret), K(state_)); } else { func = FT_ITER_GOING; } if (OB_SUCC(ret)) { state_function = this->ObNestedLoopJoinOp::state_function_func_[state_][func]; if (OB_FAIL((this->*state_function)()) && OB_ITER_END != ret) { LOG_WARN("failed state function", K(ret), K(state_), K(func)); } } } // while end } if (OB_ITER_END == ret) { set_param_null(); } return ret; } void ObNestedLoopJoinOp::reset_buf_state() { state_ = JS_READ_LEFT; left_store_iter_.reset(); left_store_.reset(); is_left_end_ = false; last_store_row_.reset(); save_last_row_ = false; batch_rescan_ctl_.reset(); batch_state_ = JS_FILL_LEFT; save_last_batch_ = false; need_switch_iter_ = false; iter_end_ = false; left_batch_.clear_saved_size(); last_save_batch_.clear_saved_size(); match_left_batch_end_ = false; match_right_batch_end_ = false; l_idx_ = 0; no_match_row_found_ = true; need_output_row_ = false; left_expr_extend_size_ = 0; } int ObNestedLoopJoinOp::fill_cur_row_rescan_param() { int ret = OB_SUCCESS; ObPhysicalPlanCtx *plan_ctx = ctx_.get_physical_plan_ctx(); if (OB_ISNULL(plan_ctx)) { ret = OB_BAD_NULL_ERROR; LOG_WARN("plan ctx or left row is null", K(ret)); } else if (batch_rescan_ctl_.cur_idx_ >= batch_rescan_ctl_.params_.get_count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("row idx is unexpected", K(ret), K(batch_rescan_ctl_.cur_idx_), K(batch_rescan_ctl_.params_.get_count())); } else { common::ObIArray& params = batch_rescan_ctl_.params_.get_one_batch_params(batch_rescan_ctl_.cur_idx_); int64_t param_cnt = params.count(); int64_t idx = OB_INVALID_ID; for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) { idx = batch_rescan_ctl_.params_.get_param_idx(i); plan_ctx->get_param_store_for_update().at(idx) = params.at(i); const ObDynamicParamSetter &rescan_param = get_spec().rescan_params_.at(i); if (OB_FAIL(rescan_param.set_dynamic_param(eval_ctx_))) { LOG_WARN("fail to set dynamic param", K(ret)); } } } return ret; } int ObNestedLoopJoinOp::join_row_with_semi_join() { int ret = OB_SUCCESS; const bool is_anti = (LEFT_ANTI_JOIN == MY_SPEC.join_type_); while (OB_SUCC(ret) && OB_SUCC(get_next_left_row())) { clear_evaluated_flag(); if (OB_FAIL(try_check_status())) { LOG_WARN("check status failed", K(ret)); } else if (OB_FAIL(prepare_rescan_params())) { LOG_WARN("prepare right child rescan param failed", K(ret)); } else if (OB_FAIL(rescan_right_operator())) { LOG_WARN("rescan right child failed", K(ret)); } if (OB_SUCC(ret)) { // 迭代右表 bool is_matched = false; while (OB_SUCC(ret) && !is_matched && OB_SUCC(right_->get_next_row())) { clear_evaluated_flag(); if (OB_FAIL(try_check_status())) { LOG_WARN("check status failed", K(ret)); } else if (OB_FAIL(calc_other_conds(is_matched))) { LOG_WARN("calc other conditions failed", K(ret)); } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; // 右表不存在和左表匹配的行,所以迭代左表下一行 } if (OB_SUCC(ret) && is_anti != is_matched) { // 1. the exit conditions of semi and anti semi are different, // 2. they share the same outer while loop, // 3. is_matched must init to false for inner while loop, // so we need explicitly break. break; } } } if (OB_FAIL(ret)) { if (OB_ITER_END != ret) { LOG_WARN("get next row failed", K(ret)); } } return ret; } int ObNestedLoopJoinOp::join_end_operate() { return OB_ITER_END; } int ObNestedLoopJoinOp::join_end_func_end() { return OB_ITER_END; } int ObNestedLoopJoinOp::read_left_operate() { int ret = OB_SUCCESS; if (MY_SPEC.group_rescan_ || MY_SPEC.enable_px_batch_rescan_) { if (OB_FAIL(group_read_left_operate()) && OB_ITER_END != ret) { LOG_WARN("failed to read left group", K(ret)); } } else if (FALSE_IT(set_param_null())) { } else if (OB_FAIL(get_next_left_row()) && OB_ITER_END != ret) { LOG_WARN("failed to get next left row", K(ret)); } return ret; } int ObNestedLoopJoinOp::rescan_params_batch_one(int64_t batch_idx) { int ret = OB_SUCCESS; left_batch_.to_exprs(eval_ctx_, batch_idx, batch_idx); if (OB_FAIL(prepare_rescan_params())) { LOG_WARN("failed to prepare rescan params", K(ret)); } else if (OB_FAIL(rescan_right_operator())) { LOG_WARN("failed to rescan right op", K(ret)); } return ret; } int ObNestedLoopJoinOp::rescan_right_operator() { int ret = OB_SUCCESS; bool do_rescan = false; if (defered_right_rescan_) { do_rescan = true; defered_right_rescan_ = false; } else { // FIXME bin.lb: handle monitor dump + material ? if (PHY_MATERIAL == right_->get_spec().type_) { if (OB_FAIL(static_cast(right_)->rewind())) { if (OB_ITER_END != ret) { LOG_WARN("rewind failed", K(ret)); } } } else { do_rescan = true; } } if (OB_SUCC(ret) && do_rescan) { if (OB_FAIL(right_->rescan())) { if (OB_ITER_END != ret) { LOG_WARN("rescan right failed", K(ret)); } } else { /*do nothing*/ } } return ret; } int ObNestedLoopJoinOp::group_read_left_operate() { int ret = OB_SUCCESS; if (MY_SPEC.enable_px_batch_rescan_) { if (left_store_iter_.is_valid() && left_store_iter_.has_next()) { // 重新设置右表 table scan result, result 为下一个 cache if (MY_SPEC.enable_px_batch_rescan_) { batch_rescan_ctl_.cur_idx_++; } } else { if (is_left_end_) { ret = OB_ITER_END; } else { if (OB_ISNULL(mem_context_)) { ObSQLSessionInfo *session = ctx_.get_my_session(); uint64_t tenant_id =session->get_effective_tenant_id(); lib::ContextParam param; param.set_mem_attr(tenant_id, ObModIds::OB_SQL_NLJ_CACHE, ObCtxIds::WORK_AREA) .set_properties(lib::USE_TL_PAGE_OPTIONAL); if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) { LOG_WARN("create entity failed", K(ret)); } else if (OB_ISNULL(mem_context_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null memory entity returned", K(ret)); } else if (OB_FAIL(left_store_.init(UINT64_MAX, tenant_id, ObCtxIds::WORK_AREA))) { LOG_WARN("init row store failed", K(ret)); } else { left_store_.set_allocator(mem_context_->get_malloc_allocator()); } } bool ignore_end = false; if (OB_SUCC(ret)) { // 没有下一个了, 尝试填充 cache. batch_rescan_ctl_.reuse(); left_store_iter_.reset(); left_store_.reset(); mem_context_->get_arena_allocator().reset(); if (OB_ISNULL(last_store_row_.get_store_row())) { if (save_last_row_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: store row is null", K(ret)); } else if (OB_FAIL(last_store_row_.init( mem_context_->get_malloc_allocator(), left_->get_spec().output_.count()))) { LOG_WARN("failed to init right last row", K(ret)); } } else if (save_last_row_) { if (OB_FAIL(last_store_row_.restore(left_->get_spec().output_, eval_ctx_))) { LOG_WARN("failed to restore left row", K(ret)); } } save_last_row_ = false; set_param_null(); while (OB_SUCC(ret) && !is_full()) { // need clear evaluated flag, since prepare_rescan_params() will evaluate expression. clear_evaluated_flag(); if (OB_FAIL(get_next_left_row())) { if (OB_ITER_END != ret) { LOG_WARN("failed to get next left row", K(ret)); } else { is_left_end_ = true; } } else if (OB_FAIL(left_store_.add_row(left_->get_spec().output_, &eval_ctx_))) { LOG_WARN("failed to store left row", K(ret)); // do nothing } else if (OB_FAIL(prepare_rescan_params(true/*is_group*/))) { LOG_WARN("failed to prepare rescan params", K(ret)); // 下压参数数据是由被换的原始表达式计算生成, 比如c1 = c2 + 1--> c1 = ?; // 下压参数?的值, 由c2+1计算而来, c2+1的内存是复用的, 如果此时不深拷贝 // 计算query range的下压param, 则可能导致后面query range的结果和 // 前面query range的obobj对应的ptr(string/number类型在obj中ptr)使用相同指针; } else { ignore_end = true; } } if (OB_SUCC(ret)) { // here need to set param null, because dynamic datum ptr // which from last batch row may invalid set_param_null(); if (OB_FAIL(last_store_row_.shadow_copy(left_->get_spec().output_, eval_ctx_))) { LOG_WARN("failed to shadow copy last left row", K(ret)); } else { save_last_row_ = true; } } } if (OB_SUCC(ret) || (ignore_end && OB_ITER_END == ret)) { ret = OB_SUCCESS; ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_); if (OB_FAIL(left_store_.finish_add_row(false))) { LOG_WARN("failed to finish add row to row store", K(ret)); } else if (OB_FAIL(left_store_.begin(left_store_iter_))) { LOG_WARN("failed to begin iterator for chunk row store", K(ret)); } } } } if (OB_SUCC(ret)) { // 拿到下一行 ret = OB_SUCCESS; clear_evaluated_flag(); if (OB_FAIL(left_store_iter_.get_next_row(left_->get_spec().output_, eval_ctx_))) { LOG_WARN("Failed to get next row", K(ret)); } else if (MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(fill_cur_row_rescan_param())) { LOG_WARN("fail to fill cur row rescan param", K(ret)); } else if (MY_SPEC.enable_px_batch_rescan_) { OZ(right_->rescan()); } if (OB_SUCC(ret)) { left_row_joined_ = false; } } } else { // das group rescan bool has_next = false; if (OB_FAIL(group_join_buffer_.fill_group_buffer())) { LOG_WARN("fill group buffer failed", KR(ret)); } else if (OB_FAIL(group_join_buffer_.has_next_left_row(has_next))) { LOG_WARN("check has next failed", KR(ret)); } else if (has_next) { clear_evaluated_flag(); if (OB_FAIL(group_join_buffer_.rescan_right())) { if (OB_ITER_END == ret) { ret = OB_ERR_UNEXPECTED; } LOG_WARN("rescan right failed", KR(ret)); } else if (OB_FAIL(group_join_buffer_.fill_cur_row_group_param())) { LOG_WARN("fill group param failed", KR(ret)); } } else { ret = OB_ITER_END; } if (OB_SUCC(ret)) { clear_evaluated_flag(); if (OB_FAIL(group_join_buffer_.get_next_row_from_store())) { if (OB_ITER_END != ret) { LOG_WARN("get next row failed", KR(ret)); } } else { left_row_joined_ = false; } } } return ret; } int ObNestedLoopJoinOp::read_left_func_going() { int ret = OB_SUCCESS; if (MY_SPEC.group_rescan_ || MY_SPEC.enable_px_batch_rescan_) { // do nothing // group nested loop join 已经做过 rescan 了 } else if (OB_FAIL(prepare_rescan_params())) { LOG_WARN("failed to prepare rescan params", K(ret)); } else if (OB_FAIL(rescan_right_operator())) { LOG_WARN("rescan right operator failed", K(ret)); } state_ = JS_READ_RIGHT; return ret; } int ObNestedLoopJoinOp::read_left_func_end() { state_ = JS_JOIN_END; return OB_ITER_END; } int ObNestedLoopJoinOp::read_right_operate() { int ret = OB_SUCCESS; if (OB_FAIL(get_next_right_row()) && OB_ITER_END != ret) { LOG_WARN("failed to get next right row", K(ret)); } else { clear_evaluated_flag(); } return ret; } int ObNestedLoopJoinOp::read_right_func_going() { int ret = OB_SUCCESS; bool is_match = false; if (OB_SUCC(ret)) { if (OB_FAIL(calc_other_conds(is_match))) { LOG_WARN("failed to compare left and right row on other join conds", K(ret)); } else if (is_match) { output_row_produced_ = true; left_row_joined_ = true; // left row joined sign. } else {} } return ret; } int ObNestedLoopJoinOp::read_right_func_end() { int ret = OB_SUCCESS; if (need_left_join() && !left_row_joined_) { output_row_produced_ = true; if (OB_FAIL(blank_row(right_->get_spec().output_))) { LOG_WARN("failed to blank right row", K(ret)); } } state_ = JS_READ_LEFT; return ret; } bool ObNestedLoopJoinOp::is_full() const { return left_store_.get_row_cnt() >= MY_SPEC.group_size_; } int ObNestedLoopJoinOp::get_left_batch() { int ret = OB_SUCCESS; if (MY_SPEC.group_rescan_ || MY_SPEC.enable_px_batch_rescan_) { if (OB_FAIL(group_get_left_batch(left_brs_)) && OB_ITER_END != ret) { LOG_WARN("fail to get left batch", K(ret)); } } else { // Reset exec param before get left row, because the exec param still reference // to the previous row, when get next left row, it may become wild pointer. // The exec parameter may be accessed by the under PX execution by serialization, which // serialize whole parameters store. set_param_null(); if (is_left_end_) { // do nothing ret = OB_ITER_END; } else if (!IS_LEFT_SEMI_ANTI_JOIN(MY_SPEC.join_type_) // semi/anti has to_exprs in out process && FALSE_IT(left_batch_.to_exprs(eval_ctx_))) { } else if (OB_FAIL(left_->get_next_batch(op_max_batch_size_, left_brs_))) { LOG_WARN("fail to get next batch", K(ret)); } else if (left_brs_->end_) { is_left_end_ = true; } } if (OB_FAIL(ret)) { // do nothing } else if (left_brs_->end_ && left_brs_->size_ == 0) { ret = OB_ITER_END; } else { left_batch_.from_exprs(eval_ctx_, left_brs_->skip_, left_brs_->size_); } return ret; } int ObNestedLoopJoinOp::group_get_left_batch(const ObBatchRows *&left_brs) { int ret = OB_SUCCESS; if (MY_SPEC.enable_px_batch_rescan_) { left_brs = &left_->get_brs(); if (left_store_iter_.is_valid() && left_store_iter_.has_next()) { // do nothing } else { if (is_left_end_) { ret = OB_ITER_END; // do nothing } else { if (OB_ISNULL(mem_context_)) { ObSQLSessionInfo *session = ctx_.get_my_session(); uint64_t tenant_id =session->get_effective_tenant_id(); lib::ContextParam param; param.set_mem_attr(tenant_id, ObModIds::OB_SQL_NLJ_CACHE, ObCtxIds::WORK_AREA) .set_properties(lib::USE_TL_PAGE_OPTIONAL); if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) { LOG_WARN("create entity failed", K(ret)); } else if (OB_ISNULL(mem_context_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null memory entity returned", K(ret)); } else if (OB_FAIL(left_store_.init(UINT64_MAX, tenant_id, ObCtxIds::WORK_AREA))) { LOG_WARN("init row store failed", K(ret)); } else { left_store_.set_allocator(mem_context_->get_malloc_allocator()); } } if (OB_SUCC(ret)) { // 没有下一个了, 尝试填充 cache. batch_rescan_ctl_.reuse(); left_store_iter_.reset(); left_store_.reset(); mem_context_->get_arena_allocator().reset(); save_last_row_ = false; ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_); while (OB_SUCC(ret) && continue_fetching()) { // need clear evaluated flag, since prepare_rescan_params() will evaluate expression. clear_evaluated_flag(); if (save_last_batch_) { last_save_batch_.to_exprs(eval_ctx_); save_last_batch_ = false; } set_param_null(); if (OB_FAIL(left_->get_next_batch(op_max_batch_size_, left_brs_))) { LOG_WARN("failed to get next left row", K(ret)); } else if (left_brs_->end_) { is_left_end_ = true; } for (int64_t l_idx = 0; OB_SUCC(ret) && l_idx < left_brs_->size_; l_idx++) { if (left_brs_->skip_->exist(l_idx)) { continue; } batch_info_guard.set_batch_idx(l_idx); batch_info_guard.set_batch_size(left_brs_->size_); if (OB_FAIL(left_store_.add_row(left_->get_spec().output_, &eval_ctx_))) { LOG_WARN("failed to store left row", K(ret)); // do nothing } else if (OB_FAIL(prepare_rescan_params(true))) { LOG_WARN("failed to prepare rescan params", K(ret)); // 下压参数数据是由被换的原始表达式计算生成, 比如c1 = c2 + 1--> c1 = ?; // 下压参数?的值, 由c2+1计算而来, c2+1的内存是复用的, 如果此时不深拷贝 // 计算query range的下压param, 则可能导致后面query range的结果和 // 前面query range的obobj对应的ptr(string/number类型在obj中ptr)使用相同指针; } } // for end } if (OB_SUCC(ret)) { set_param_null(); if (left_brs_->size_ == 0 && left_brs_->end_) { // do nothing } else { last_save_batch_.from_exprs(eval_ctx_, left_brs_->skip_, left_brs_->size_); save_last_batch_ = true; } } clear_evaluated_flag(); } if (OB_SUCC(ret) ) { if (left_store_.get_row_cnt() <= 0) { ret = OB_ITER_END; } else if (OB_FAIL(left_store_.finish_add_row(false))) { LOG_WARN("failed to finish add row to row store", K(ret)); } else if (OB_FAIL(left_store_.begin(left_store_iter_))) { LOG_WARN("failed to begin iterator for chunk row store", K(ret)); } else { need_switch_iter_ = false; } } } } if (OB_SUCC(ret)) { int64_t read_size = 0; int64_t max_size = MY_SPEC.max_batch_size_; last_save_batch_.extend_save(eval_ctx_, max_size); if (OB_FAIL(left_store_iter_.get_next_batch(left_->get_spec().output_, eval_ctx_, max_size, read_size))) { if (OB_ITER_END == ret) { // do nothing } else { LOG_WARN("Failed to get next row", K(ret)); } } if (OB_SUCC(ret)) { const_cast(left_brs)->skip_->reset(read_size); const_cast(left_brs)->size_ = read_size; const_cast(left_brs)->end_ = false; left_row_joined_ = false; } } } else { // das group rescan bool has_next = false; if (OB_FAIL(group_join_buffer_.batch_fill_group_buffer(op_max_batch_size_, left_brs_))) { if (OB_ITER_END != ret) { LOG_WARN("batch fill group buffer failed", KR(ret)); } } else if (OB_FAIL(group_join_buffer_.has_next_left_row(has_next))) { LOG_WARN("check has next failed", KR(ret)); } else if (!has_next) { ret = OB_ITER_END; } if (OB_SUCC(ret)) { int64_t read_size = 0; int64_t max_size = op_max_batch_size_; if (OB_FAIL(group_join_buffer_.get_next_batch_from_store(max_size, read_size))) { if (OB_ITER_END != ret) { LOG_WARN("get next batch from store failed", KR(ret)); } } else { const_cast(left_brs)->skip_->reset(read_size); const_cast(left_brs)->size_ = read_size; const_cast(left_brs)->end_ = false; left_row_joined_ = false; } } } return ret; } int ObNestedLoopJoinOp::rescan_right_op() { int ret = OB_SUCCESS; ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_); // Note: // Overwrite batch_size in the beginning of the loop as eval_ctx_.batch_size // would be modified when processing right child. // Adding seperated guards for left/right children can also solve the problem, // we don't choose that way due to performance reason. batch_info_guard.set_batch_size(left_brs_->size_); if (!MY_SPEC.group_rescan_ && !MY_SPEC.enable_px_batch_rescan_) { batch_info_guard.set_batch_idx(l_idx_); if (OB_FAIL(rescan_params_batch_one(l_idx_))) { LOG_WARN("fail to rescan params", K(ret)); } } else if (MY_SPEC.group_rescan_ && !MY_SPEC.enable_px_batch_rescan_) { if (OB_FAIL(group_join_buffer_.rescan_right())) { if (OB_ITER_END == ret) { ret = OB_ERR_UNEXPECTED; } LOG_WARN("rescan right failed", KR(ret)); } else if (OB_FAIL(group_join_buffer_.fill_cur_row_group_param())) { LOG_WARN("fill group param failed", KR(ret)); } } else if (MY_SPEC.enable_px_batch_rescan_) { // NOTE: left batch is ALWAYS continous, NO need to check skip for // left_brs under px batch rescan batch_info_guard.set_batch_idx(l_idx_); left_batch_.to_exprs(eval_ctx_, l_idx_, l_idx_); if (OB_FAIL(fill_cur_row_rescan_param())) { LOG_WARN("fail to fill cur row rescan param", K(ret)); } else if (OB_FAIL(right_->rescan())) { LOG_WARN("failed to rescan right", K(ret)); } else { // do nothing } } return ret; } int ObNestedLoopJoinOp::process_right_batch() { int ret = OB_SUCCESS; ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_); batch_info_guard.set_batch_size(left_brs_->size_); reset_batchrows(); const ObBatchRows *right_brs = &right_->get_brs(); const ObIArray &conds = get_spec().other_join_conds_; clear_evaluated_flag(); if (OB_FAIL(right_->get_next_batch(op_max_batch_size_, right_brs))) { LOG_WARN("fail to get next right batch", K(ret), K(MY_SPEC)); } else if (0 == right_brs->size_ && right_brs->end_) { match_right_batch_end_ = true; } else { if (MY_SPEC.enable_px_batch_rescan_) { last_save_batch_.extend_save(eval_ctx_, right_brs->size_); } else if (MY_SPEC.group_rescan_) { group_join_buffer_.get_last_batch().extend_save(eval_ctx_, right_brs->size_); } else { left_batch_.extend_save(eval_ctx_, right_brs->size_); } left_expr_extend(right_brs->size_); if (0 == conds.count()) { brs_.skip_->deep_copy(*right_brs->skip_, right_brs->size_); } else { batch_info_guard.set_batch_size(right_brs->size_); bool is_match = false; for (int64_t r_idx = 0; OB_SUCC(ret) && r_idx < right_brs->size_; r_idx++) { batch_info_guard.set_batch_idx(r_idx); if (right_brs->skip_->exist(r_idx)) { brs_.skip_->set(r_idx); } else if (OB_FAIL(calc_other_conds(is_match))) { LOG_WARN("calc_other_conds failed", K(ret), K(r_idx), K(right_brs->size_)); } else if (!is_match) { brs_.skip_->set(r_idx); } else { /*do nothing*/ } LOG_DEBUG("cal_other_conds finished ", K(is_match), K(l_idx_), K(r_idx)); } // for conds end } if (OB_SUCC(ret)) { brs_.size_ = right_brs->size_; int64_t skip_cnt = brs_.skip_->accumulate_bit_cnt(right_brs->size_); if (IS_LEFT_SEMI_ANTI_JOIN(MY_SPEC.join_type_)) { if (right_brs->size_ - skip_cnt > 0) { left_matched_->set(l_idx_); match_right_batch_end_ = true; } } else { if (right_brs->size_ - skip_cnt > 0) { need_output_row_ = true; no_match_row_found_ = false; } } match_right_batch_end_ = match_right_batch_end_ || right_brs->end_; } } // outer join if (OB_SUCC(ret)) { if (match_right_batch_end_ && no_match_row_found_ && need_left_join()) { need_output_row_ = true; } } return ret; } // Expand left row full column int ObNestedLoopJoinOp::left_expr_extend(int32_t size) { int ret = OB_SUCCESS; for (int32_t r_idx = left_expr_extend_size_; OB_SUCC(ret) && r_idx < size; r_idx++) { left_batch_.to_exprs(eval_ctx_, l_idx_, r_idx); } if (left_expr_extend_size_ < size) { left_expr_extend_size_ = size; } return ret; } int ObNestedLoopJoinOp::output() { int ret = OB_SUCCESS; if (IS_LEFT_SEMI_ANTI_JOIN(MY_SPEC.join_type_)) { reset_batchrows(); if (LEFT_SEMI_JOIN == MY_SPEC.join_type_) { brs_.skip_->bit_calculate(*left_batch_.get_skip(), *left_matched_, left_batch_.get_size(), [](const uint64_t l, const uint64_t r) { return (l | (~r)); }); } else if (LEFT_ANTI_JOIN == MY_SPEC.join_type_) { brs_.skip_->bit_calculate(*left_batch_.get_skip(), *left_matched_, left_batch_.get_size(), [](const uint64_t l, const uint64_t r) { return (l | r); }); } if (MY_SPEC.enable_px_batch_rescan_) { last_save_batch_.extend_save(eval_ctx_, left_batch_.get_size()); } left_batch_.to_exprs(eval_ctx_); brs_.size_ = left_batch_.get_size(); left_matched_->reset(left_batch_.get_size()); } else { // do nothing. } // outer join: generate a blank row for LEFT OUTER JOIN // Note: optimizer guarantee there is NO RIGHT/FULL OUTER JOIN for NLJ if (OB_SUCC(ret) && match_right_batch_end_ && no_match_row_found_ && need_left_join()) { reset_batchrows(); brs_.size_ = 1; ObEvalCtx::BatchInfoScopeGuard guard(eval_ctx_); guard.set_batch_idx(0); blank_row_batch_one(right_->get_spec().output_); if (MY_SPEC.enable_px_batch_rescan_) { last_save_batch_.extend_save(eval_ctx_, 1); } else if (!MY_SPEC.group_rescan_) { left_batch_.extend_save(eval_ctx_, 1); } left_batch_.to_exprs(eval_ctx_, l_idx_, 0); } return ret; } void ObNestedLoopJoinOp::reset_left_batch_state() { match_left_batch_end_ = false; l_idx_ = 0; } void ObNestedLoopJoinOp::reset_right_batch_state() { match_right_batch_end_ = false; l_idx_++; no_match_row_found_ = true; left_expr_extend_size_ = 0; if (MY_SPEC.enable_px_batch_rescan_) { batch_rescan_ctl_.cur_idx_++; } } void ObNestedLoopJoinOp::skip_l_idx() { if (!MY_SPEC.group_rescan_ && !MY_SPEC.enable_px_batch_rescan_) { while (l_idx_ >= 0 && l_idx_ < left_brs_->size_) { if (left_brs_->skip_->exist(l_idx_)) { l_idx_++; } else { break; } } } } int ObNestedLoopJoinOp::inner_get_next_batch(const int64_t max_row_cnt) { int ret = OB_SUCCESS; if (iter_end_) { brs_.size_ = 0; brs_.end_ = true; } op_max_batch_size_ = min(max_row_cnt, MY_SPEC.max_batch_size_); while (!iter_end_ && OB_SUCC(ret)) { clear_evaluated_flag(); if (JS_FILL_LEFT == batch_state_) { if (OB_FAIL(get_left_batch())) { if (OB_ITER_END == ret) { ret = OB_SUCCESS; brs_.size_ = 0; brs_.end_ = true; iter_end_ = true; } else { LOG_WARN("fail to get left batch", K(ret)); } } else { batch_state_ = JS_RESCAN_RIGHT_OP; } } if (OB_SUCC(ret) && JS_RESCAN_RIGHT_OP == batch_state_) { skip_l_idx(); if (l_idx_ >= left_brs_->size_) { match_left_batch_end_ = true; } if (!match_left_batch_end_ && OB_FAIL(rescan_right_op())) { LOG_WARN("fail to rescan right op", K(ret)); } else { if (match_left_batch_end_ && IS_LEFT_SEMI_ANTI_JOIN(MY_SPEC.join_type_)) { batch_state_ = JS_OUTPUT; reset_left_batch_state(); } else if (match_left_batch_end_) { batch_state_ = JS_FILL_LEFT; reset_left_batch_state(); } else { batch_state_ = JS_PROCESS_RIGHT_BATCH; } } } if (OB_SUCC(ret) && JS_PROCESS_RIGHT_BATCH == batch_state_) { if (OB_FAIL(process_right_batch())) { LOG_WARN("fail to process right batch", K(ret)); } else { if (IS_LEFT_SEMI_ANTI_JOIN(MY_SPEC.join_type_)) { if (match_right_batch_end_) { batch_state_ = JS_RESCAN_RIGHT_OP; reset_right_batch_state(); } else { batch_state_ = JS_PROCESS_RIGHT_BATCH; } } else { if (need_output_row_) { batch_state_ = JS_OUTPUT; need_output_row_ = false; } else { if (match_right_batch_end_) { batch_state_ = JS_RESCAN_RIGHT_OP; reset_right_batch_state(); } else { batch_state_ = JS_PROCESS_RIGHT_BATCH; } } } } } if (OB_SUCC(ret) && JS_OUTPUT == batch_state_) { if (OB_FAIL(output())) { LOG_WARN("fail to output", K(ret)); } else { if (IS_LEFT_SEMI_ANTI_JOIN(MY_SPEC.join_type_)) { batch_state_ = JS_FILL_LEFT; } else if (match_right_batch_end_) { batch_state_ = JS_RESCAN_RIGHT_OP; reset_right_batch_state(); } else { batch_state_ = JS_PROCESS_RIGHT_BATCH; } break; } } } if (OB_SUCC(ret) && iter_end_) { set_param_null(); } return ret; } //calc other conditions int ObNestedLoopJoinOp::calc_other_conds(bool &is_match) { int ret = OB_SUCCESS; is_match = true; const ObIArray &conds = MY_SPEC.other_join_conds_; ObDatum *cmp_res = NULL; ARRAY_FOREACH(conds, i) { auto cond = conds.at(i); if (OB_FAIL(conds.at(i)->eval(eval_ctx_, cmp_res))) { LOG_WARN("fail to calc other join condition", K(ret), K(*conds.at(i))); } else if (cmp_res->is_null() || 0 == cmp_res->get_int()) { is_match = false; break; } } return ret; } } // end namespace sql } // end namespace oceanbase