[FEAT MERGE] merge NLJ/SPF group rescan

This commit is contained in:
obdev
2023-01-12 15:17:04 +00:00
committed by OB-robot
parent 4799992a7f
commit 263a44af96
36 changed files with 3945 additions and 859 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,169 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_BASIC_OB_GROUP_JOIN_BUFFER_H_
#define OCEANBASE_BASIC_OB_GROUP_JOIN_BUFFER_H_
#include "sql/engine/basic/ob_chunk_datum_store.h"
#include "sql/engine/ob_operator.h"
namespace oceanbase
{
namespace sql
{
struct ObBatchRowDatums
{
ObBatchRowDatums() { reset(); }
int init(const ObExprPtrIArray *exprs, common::ObIAllocator *alloc, int32_t batch_size);
void reset()
{
alloc_ = NULL;
exprs_ = NULL;
batch_size_ = 0;
datums_ = NULL;
skip_ = NULL;
size_ = 0;
saved_size_ = 0;
is_inited_ = false;
}
void from_exprs(ObEvalCtx &ctx, ObBitVector *skip, int64_t size);
void extend_save(ObEvalCtx &ctx, int64_t size);
void to_exprs(ObEvalCtx &ctx);
void to_exprs(ObEvalCtx &ctx, int64_t from_idx, int64_t to_idx);
ObDatum &get_datum(int64_t row_id, int64_t col_id)
{
return datums_[col_id * batch_size_ + row_id];
}
bool is_inited() const { return is_inited_; }
public:
common::ObIAllocator *alloc_;
const ObExprPtrIArray *exprs_;
int32_t batch_size_;
ObDatum *datums_;
ObBitVector *skip_;
int32_t size_;
int32_t saved_size_; // record the saved size, include extend saved size
private:
bool is_inited_;
};
class ObGroupJoinBufffer
{
public:
ObGroupJoinBufffer();
~ObGroupJoinBufffer() {} // does not free memory
int init(ObOperator *op,
const int64_t max_group_size,
const int64_t group_scan_size,
const common::ObIArray<ObDynamicParamSetter> *rescan_params,
const common::ObIArray<ObDynamicParamSetter> *left_rescan_params,
const common::ObIArray<ObDynamicParamSetter> *right_rescan_params);
bool is_inited() const { return is_inited_; }
bool is_full() const { return left_store_.get_row_cnt() >= group_scan_size_; }
bool need_fill_group_buffer() { return !(left_store_iter_.is_valid() && left_store_iter_.has_next()); }
bool is_multi_level() const { return is_multi_level_; }
int has_next_left_row(bool &has_next);
int init_above_group_params();
int fill_cur_row_group_param();
int rescan_left();
int rescan_right();
int fill_group_buffer();
int batch_fill_group_buffer(const int64_t max_row_cnt, const ObBatchRows *&batch_rows);
int get_next_row_from_store();
int get_next_batch_from_store(int64_t max_rows, int64_t &read_rows);
void destroy();
private:
int init_group_params();
int deep_copy_dynamic_obj();
int bind_group_params_to_store();
int prepare_rescan_params();
int get_next_left_row();
int get_next_left_batch(const int64_t max_row_cnt, const ObBatchRows *&batch_rows);
int add_row_to_store();
int build_above_group_params(const common::ObIArray<ObDynamicParamSetter> &above_rescan_params,
common::ObIArray<ObSqlArrayObj *> &above_group_params,
int64_t &group_size);
int set_above_group_size();
void reset_buffer_state();
int backup_above_params(common::ObIArray<ObObjParam> &left_params_backup,
common::ObIArray<ObObjParam> &right_params_backup);
int restore_above_params(common::ObIArray<ObObjParam> &left_params_backup,
common::ObIArray<ObObjParam> &right_params_backup);
private:
ObOperator *op_;
const ObOpSpec *spec_;
ObExecContext *ctx_;
ObEvalCtx *eval_ctx_;
ObOperator *left_;
ObOperator * right_;
const common::ObIArray<ObDynamicParamSetter> *rescan_params_;
const common::ObIArray<ObDynamicParamSetter> *left_rescan_params_;
const common::ObIArray<ObDynamicParamSetter> *right_rescan_params_;
lib::MemoryContext mem_context_;
// buffer for rows read from left child
ObChunkDatumStore left_store_;
ObChunkDatumStore::Iterator left_store_iter_;
// for multi level batch rescan
// NLJ 1
// / \
// TSC 1 NLJ 2
// / \
// TSC 2 TSC 3
// During NLJ 2's rescan, NLJ 1 may have supplied a batch of params to TSC 2 and TSC 3.
// Thus, NLJ 2 need to keep track of NLJ 1's batch rescan pace and make sure only output
// rows corresponding to NLJ 2's current rescan.
//
// store batch index corresponding to NLJ above this op
common::ObSEArray<int64_t, 2> left_store_group_idx_;
// for NLJ 2, above_left_batch_params_ stores batch rescan params used by TSC 2 and supplied by NLJ 1
common::ObSEArray<ObSqlArrayObj *, 2> above_left_group_params_;
// for NLJ 2, above_right_batch_params_ stores batch rescan params used by TSC 3 and supplied by NLJ 1
common::ObSEArray<ObSqlArrayObj *, 2> above_right_group_params_;
// batch rescan params supplied by this op
common::ObArrayWrap<ObSqlArrayObj> group_params_;
// for NLJ 2, we need to rewrite params in above_right_batch_params_ and align them with our rescan pace,
// above_bnlj_params_ stores batch params supplied by NLJ 1 and overwritten by NLJ 2
common::ObArrayWrap<ObSqlArrayObj> above_group_params_;
ObChunkDatumStore::ShadowStoredRow last_row_;
ObBatchRowDatums last_batch_;
int64_t right_cnt_;
int64_t cur_group_idx_;
// rows read from left_store_iter_
int64_t left_store_read_;
// index used when filling group buffer,
// see fill_group_buffer() and batch_fill_group_buffer()
int64_t above_group_idx_for_expand_;
// index used when reading from right child,
// see get_next_row_from_store() and get_next_batch_from_store()
int64_t above_group_idx_for_read_;
int64_t above_group_size_;
int64_t max_group_size_;
int64_t group_scan_size_;
union {
uint64_t flags_;
struct {
uint64_t is_inited_ : 1;
uint64_t need_check_above_ : 1;
uint64_t is_multi_level_ : 1;
uint64_t is_left_end_ : 1;
uint64_t save_last_row_ : 1;
uint64_t save_last_batch_ : 1;
uint64_t skip_rescan_right_ : 1;
uint64_t reserved_ : 57;
};
};
};
} // end namespace sql
} // end namespace oceanbase
#endif // OCEANBASE_BASIC_OB_GROUP_JOIN_BUFFER_H_

View File

@ -23,7 +23,10 @@ namespace sql
{
OB_SERIALIZE_MEMBER((ObNestedLoopJoinSpec, ObBasicNestedLoopJoinSpec),
use_group_, left_group_size_, left_expr_ids_in_other_cond_);
group_rescan_, group_size_,
left_expr_ids_in_other_cond_,
left_rescan_params_,
right_rescan_params_);
ObNestedLoopJoinOp::ObNestedLoopJoinOp(ObExecContext &exec_ctx,
const ObOpSpec &spec,
@ -35,8 +38,8 @@ ObNestedLoopJoinOp::ObNestedLoopJoinOp(ObExecContext &exec_ctx,
batch_state_(JS_FILL_LEFT), save_last_batch_(false),
batch_mem_ctx_(NULL), stored_rows_(NULL), left_brs_(NULL), left_matched_(NULL),
need_switch_iter_(false), iter_end_(false), op_max_batch_size_(0),
max_group_size_(BNLJ_DEFAULT_GROUP_SIZE),
bnlj_cur_idx_(0)
max_group_size_(OB_MAX_BULK_JOIN_ROWS),
group_join_buffer_()
{
state_operation_func_[JS_JOIN_END] = &ObNestedLoopJoinOp::join_end_operate;
state_function_func_[JS_JOIN_END][FT_ITER_GOING] = NULL;
@ -61,8 +64,8 @@ int ObNestedLoopJoinOp::inner_open()
LOG_WARN("failed to open in base class", K(ret));
}
if (OB_SUCC(ret) && is_vectorized()) {
if (MY_SPEC.use_group_) {
max_group_size_ = BNLJ_DEFAULT_GROUP_SIZE + MY_SPEC.plan_->get_batch_size();
if (MY_SPEC.group_rescan_) {
max_group_size_ = OB_MAX_BULK_JOIN_ROWS + MY_SPEC.plan_->get_batch_size();
}
if (OB_ISNULL(batch_mem_ctx_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
@ -118,15 +121,25 @@ int ObNestedLoopJoinOp::inner_open()
&(batch_mem_ctx_->get_arena_allocator()),
MY_SPEC.max_batch_size_))) {
LOG_WARN("fail to init batch", K(ret));
} else if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
} else if (MY_SPEC.enable_px_batch_rescan_) {
if (OB_FAIL(last_save_batch_.init(&left_->get_spec().output_,
&batch_mem_ctx_->get_arena_allocator(),
MY_SPEC.max_batch_size_))) {
&batch_mem_ctx_->get_arena_allocator(),
MY_SPEC.max_batch_size_))) {
LOG_WARN("fail to init batch", K(ret));
}
}
}
}
if (OB_SUCC(ret) && MY_SPEC.group_rescan_) {
if (OB_FAIL(group_join_buffer_.init(this,
max_group_size_,
MY_SPEC.group_size_,
&MY_SPEC.rescan_params_,
&MY_SPEC.left_rescan_params_,
&MY_SPEC.right_rescan_params_))) {
LOG_WARN("init batch info failed", KR(ret));
}
}
return ret;
}
//NLJ has its own switch_iterator
@ -156,12 +169,19 @@ int ObNestedLoopJoinOp::rescan()
//NLJ's rescan should only drive left child's rescan,
//the right child's rescan is defer to rescan_right_operator() driven by get_next_row();
defered_right_rescan_ = true;
if (OB_FAIL(left_->rescan())) {
LOG_WARN("rescan left child operator failed", K(ret), "child op_type", left_->op_name());
}
if (OB_SUCC(ret)) {
if (OB_FAIL(inner_rescan())) {
LOG_WARN("failed to inner rescan", K(ret));
if (!MY_SPEC.group_rescan_) {
if (OB_FAIL(left_->rescan())) {
LOG_WARN("rescan left child operator failed", KR(ret), "child op_type", left_->op_name());
} else if (OB_FAIL(inner_rescan())) {
LOG_WARN("failed to inner rescan", KR(ret));
}
} else {
if (OB_FAIL(group_join_buffer_.init_above_group_params())) {
LOG_WARN("init above bnlj params failed", KR(ret));
} else if (OB_FAIL(group_join_buffer_.rescan_left())) {
LOG_WARN("rescan left failed", KR(ret));
} else if (OB_FAIL(inner_rescan())) {
LOG_WARN("inner rescan failed", KR(ret));
}
}
@ -268,33 +288,6 @@ int ObNestedLoopJoinOp::fill_cur_row_rescan_param()
return ret;
}
int ObNestedLoopJoinOp::fill_cur_row_bnlj_param()
{
int ret = OB_SUCCESS;
ObPhysicalPlanCtx *plan_ctx = ctx_.get_physical_plan_ctx();
if (bnlj_params_.empty() || bnlj_cur_idx_ >= bnlj_params_.at(0).count_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("row idx is unexpected", K(ret),
K(bnlj_cur_idx_), K(bnlj_params_.at(0).count_));
} else {
int64_t param_cnt = bnlj_params_.count();
for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) {
const ObDynamicParamSetter &rescan_param = get_spec().rescan_params_.at(i);
int64_t param_idx = rescan_param.param_idx_;
ObExpr *dst = rescan_param.dst_;
ObDatum &param_datum = dst->locate_datum_for_write(eval_ctx_);
ObSqlArrayObj &arr = bnlj_params_.at(i);
if (OB_FAIL(param_datum.from_obj(arr.data_[bnlj_cur_idx_], dst->obj_datum_map_))) {
LOG_WARN("fail to cast datum", K(ret));
} else {
plan_ctx->get_param_store_for_update().at(param_idx) = arr.data_[bnlj_cur_idx_];
dst->set_evaluated_projected(eval_ctx_);
}
}
}
return ret;
}
int ObNestedLoopJoinOp::join_row_with_semi_join()
{
int ret = OB_SUCCESS;
@ -356,7 +349,7 @@ int ObNestedLoopJoinOp::join_end_func_end()
int ObNestedLoopJoinOp::read_left_operate()
{
int ret = OB_SUCCESS;
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.group_rescan_ || MY_SPEC.enable_px_batch_rescan_) {
if (OB_FAIL(group_read_left_operate()) && OB_ITER_END != ret) {
LOG_WARN("failed to read left group", K(ret));
}
@ -408,230 +401,165 @@ int ObNestedLoopJoinOp::rescan_right_operator()
}
} else {
brs_holder_.reset();
if (MY_SPEC.use_group_ && !MY_SPEC.enable_px_batch_rescan_) {
if (OB_FAIL(fill_cur_row_bnlj_param())) {
LOG_WARN("fill bnlj param failed", K(ret));
} else {
bnlj_cur_idx_++;
}
}
}
}
return ret;
}
int ObNestedLoopJoinOp::init_bnlj_params()
{
int ret = OB_SUCCESS;
if (!bnlj_params_.empty()) {
//to reuse bnlj param buffer
for (int64_t i = 0; i < bnlj_params_.count(); ++i) {
bnlj_params_.at(i).count_ = 0;
}
} else if (OB_FAIL(bnlj_params_.allocate_array(ctx_.get_allocator(),
get_spec().rescan_params_.count()))) {
LOG_WARN("allocate bnlj params failed", K(ret), K(get_spec().rescan_params_.count()));
} else {
int64_t obj_buf_size = sizeof(ObObjParam) * max_group_size_;
for (int64_t i = 0; OB_SUCC(ret) && i < bnlj_params_.count(); ++i) {
ObExpr *dst_expr = get_spec().rescan_params_.at(i).dst_;
void *buf = ctx_.get_allocator().alloc(obj_buf_size);
if (OB_ISNULL(buf)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc memory", K(ret), K(obj_buf_size));
} else {
bnlj_params_.at(i).data_ = reinterpret_cast<ObObjParam*>(buf);
bnlj_params_.at(i).count_ = 0;
bnlj_params_.at(i).element_.set_meta_type(dst_expr->obj_meta_);
}
}
}
return ret;
}
int ObNestedLoopJoinOp::bind_bnlj_param_to_store()
{
int ret = OB_SUCCESS;
int64_t param_cnt = get_spec().rescan_params_.count();
ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_);
ParamStore &param_store = plan_ctx->get_param_store_for_update();
if (OB_UNLIKELY(param_cnt != bnlj_params_.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("bnlj param count is invalid", K(ret), K(param_cnt), K(bnlj_params_.count()));
}
for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) {
const ObDynamicParamSetter &rescan_param = get_spec().rescan_params_.at(i);
int64_t param_idx = rescan_param.param_idx_;
int64_t array_obj_addr = reinterpret_cast<int64_t>(&bnlj_params_.at(i));
param_store.at(param_idx).set_extend(array_obj_addr, T_EXT_SQL_ARRAY);
}
return ret;
}
int ObNestedLoopJoinOp::group_read_left_operate()
{
int ret = OB_SUCCESS;
if (left_store_iter_.is_valid() && left_store_iter_.has_next()) {
// 重新设置右表 table scan result, result 为下一个 cache
if (MY_SPEC.enable_px_batch_rescan_) {
batch_rescan_ctl_.cur_idx_++;
} else if (OB_FAIL(rescan_right_operator())) {
// 这里是不期望有 OB_ITER_END
if (OB_ITER_END == ret) {
ret = OB_ERR_UNEXPECTED;
if (MY_SPEC.enable_px_batch_rescan_) {
if (left_store_iter_.is_valid() && left_store_iter_.has_next()) {
// 重新设置右表 table scan result, result 为下一个 cache
if (MY_SPEC.enable_px_batch_rescan_) {
batch_rescan_ctl_.cur_idx_++;
}
LOG_WARN("failed to get next right row from group", K(ret));
}
} else {
// 当前 row 对应的 cache 读完了, 左表拿新 cache, 设置右表 result_iter
if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(init_bnlj_params())) {
LOG_WARN("Failed to init group rescan", K(ret));
} else if (is_left_end_) {
ret = OB_ITER_END;
} else {
if (OB_ISNULL(mem_context_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
uint64_t tenant_id =session->get_effective_tenant_id();
lib::ContextParam param;
param.set_mem_attr(tenant_id,
ObModIds::OB_SQL_NLJ_CACHE,
ObCtxIds::WORK_AREA)
.set_properties(lib::USE_TL_PAGE_OPTIONAL);
if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) {
LOG_WARN("create entity failed", K(ret));
} else if (OB_ISNULL(mem_context_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("null memory entity returned", K(ret));
} else if (OB_FAIL(left_store_.init(UINT64_MAX, tenant_id, ObCtxIds::WORK_AREA))) {
LOG_WARN("init row store failed", K(ret));
} else {
left_store_.set_allocator(mem_context_->get_malloc_allocator());
}
}
bool ignore_end = false;
if (OB_SUCC(ret)) {
// 没有下一个了, 尝试填充 cache.
batch_rescan_ctl_.reuse();
bnlj_cur_idx_ = 0;
left_store_iter_.reset();
left_store_.reset();
mem_context_->get_arena_allocator().reset();
if (OB_ISNULL(last_store_row_.get_store_row())) {
if (save_last_row_) {
if (is_left_end_) {
ret = OB_ITER_END;
} else {
if (OB_ISNULL(mem_context_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
uint64_t tenant_id =session->get_effective_tenant_id();
lib::ContextParam param;
param.set_mem_attr(tenant_id,
ObModIds::OB_SQL_NLJ_CACHE,
ObCtxIds::WORK_AREA)
.set_properties(lib::USE_TL_PAGE_OPTIONAL);
if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) {
LOG_WARN("create entity failed", K(ret));
} else if (OB_ISNULL(mem_context_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status: store row is null", K(ret));
} else if (OB_FAIL(last_store_row_.init(
mem_context_->get_malloc_allocator(), left_->get_spec().output_.count()))) {
LOG_WARN("failed to init right last row", K(ret));
}
} else if (save_last_row_) {
if (OB_FAIL(last_store_row_.restore(left_->get_spec().output_, eval_ctx_))) {
LOG_WARN("failed to restore left row", K(ret));
}
}
save_last_row_ = false;
set_param_null();
while (OB_SUCC(ret) && !is_full()) {
// need clear evaluated flag, since prepare_rescan_params() will evaluate expression.
clear_evaluated_flag();
if (OB_FAIL(get_next_left_row())) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to get next left row", K(ret));
} else {
is_left_end_ = true;
}
} else if (OB_FAIL(left_store_.add_row(left_->get_spec().output_, &eval_ctx_))) {
LOG_WARN("failed to store left row", K(ret));
// do nothing
} else if (OB_FAIL(prepare_rescan_params(true/*is_group*/))) {
LOG_WARN("failed to prepare rescan params", K(ret));
// 下压参数数据是由被换的原始表达式计算生成, 比如c1 = c2 + 1--> c1 = ?;
// 下压参数?的值, 由c2+1计算而来, c2+1的内存是复用的, 如果此时不深拷贝
// 计算query range的下压param, 则可能导致后面query range的结果和
// 前面query range的obobj对应的ptr(string/number类型在obj中ptr)使用相同指针;
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(deep_copy_dynamic_obj())) {
LOG_WARN("fail to deep copy dynamic obj", K(ret));
LOG_WARN("null memory entity returned", K(ret));
} else if (OB_FAIL(left_store_.init(UINT64_MAX, tenant_id, ObCtxIds::WORK_AREA))) {
LOG_WARN("init row store failed", K(ret));
} else {
ignore_end = true;
left_store_.set_allocator(mem_context_->get_malloc_allocator());
}
}
bool ignore_end = false;
if (OB_SUCC(ret)) {
// here need to set param null, because dynamic datum ptr
// which from last batch row may invalid
// 没有下一个了, 尝试填充 cache.
batch_rescan_ctl_.reuse();
left_store_iter_.reset();
left_store_.reset();
mem_context_->get_arena_allocator().reset();
if (OB_ISNULL(last_store_row_.get_store_row())) {
if (save_last_row_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status: store row is null", K(ret));
} else if (OB_FAIL(last_store_row_.init(
mem_context_->get_malloc_allocator(), left_->get_spec().output_.count()))) {
LOG_WARN("failed to init right last row", K(ret));
}
} else if (save_last_row_) {
if (OB_FAIL(last_store_row_.restore(left_->get_spec().output_, eval_ctx_))) {
LOG_WARN("failed to restore left row", K(ret));
}
}
save_last_row_ = false;
set_param_null();
if (OB_FAIL(last_store_row_.shadow_copy(left_->get_spec().output_, eval_ctx_))) {
LOG_WARN("failed to shadow copy last left row", K(ret));
} else {
save_last_row_ = true;
while (OB_SUCC(ret) && !is_full()) {
// need clear evaluated flag, since prepare_rescan_params() will evaluate expression.
clear_evaluated_flag();
if (OB_FAIL(get_next_left_row())) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to get next left row", K(ret));
} else {
is_left_end_ = true;
}
} else if (OB_FAIL(left_store_.add_row(left_->get_spec().output_, &eval_ctx_))) {
LOG_WARN("failed to store left row", K(ret));
// do nothing
} else if (OB_FAIL(prepare_rescan_params(true/*is_group*/))) {
LOG_WARN("failed to prepare rescan params", K(ret));
// 下压参数数据是由被换的原始表达式计算生成, 比如c1 = c2 + 1--> c1 = ?;
// 下压参数?的值, 由c2+1计算而来, c2+1的内存是复用的, 如果此时不深拷贝
// 计算query range的下压param, 则可能导致后面query range的结果和
// 前面query range的obobj对应的ptr(string/number类型在obj中ptr)使用相同指针;
} else {
ignore_end = true;
}
}
if (OB_SUCC(ret)) {
// here need to set param null, because dynamic datum ptr
// which from last batch row may invalid
set_param_null();
if (OB_FAIL(last_store_row_.shadow_copy(left_->get_spec().output_, eval_ctx_))) {
LOG_WARN("failed to shadow copy last left row", K(ret));
} else {
save_last_row_ = true;
}
}
}
if (OB_SUCC(ret) || (ignore_end && OB_ITER_END == ret)) {
ret = OB_SUCCESS;
ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_);
if (OB_FAIL(left_store_.finish_add_row(false))) {
LOG_WARN("failed to finish add row to row store", K(ret));
} else if (OB_FAIL(left_store_.begin(left_store_iter_))) {
LOG_WARN("failed to begin iterator for chunk row store", K(ret));
}
}
}
if (OB_SUCC(ret) || (ignore_end && OB_ITER_END == ret)) {
ret = OB_SUCCESS;
ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_);
if (OB_FAIL(left_store_.finish_add_row(false))) {
LOG_WARN("failed to finish add row to row store", K(ret));
} else if (OB_FAIL(left_store_.begin(left_store_iter_))) {
LOG_WARN("failed to begin iterator for chunk row store", K(ret));
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(bind_bnlj_param_to_store())) {
LOG_WARN("bind bnlj param to store failed", K(ret));
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(rescan_right_operator())) {
LOG_WARN("failed to rescan right op", K(ret));
}
}
}
}
if (OB_SUCC(ret)) {
// 拿到下一行 ret = OB_SUCCESS;
clear_evaluated_flag();
if (OB_FAIL(left_store_iter_.get_next_row(left_->get_spec().output_,
eval_ctx_))) {
LOG_WARN("Failed to get next row", K(ret));
} else if (MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(fill_cur_row_rescan_param())) {
LOG_WARN("fail to fill cur row rescan param", K(ret));
} else if (MY_SPEC.enable_px_batch_rescan_) {
OZ(right_->rescan());
OX(brs_holder_.reset());
}
if (OB_SUCC(ret)) {
left_row_joined_ = false;
// 拿到下一行 ret = OB_SUCCESS;
clear_evaluated_flag();
if (OB_FAIL(left_store_iter_.get_next_row(left_->get_spec().output_,
eval_ctx_))) {
LOG_WARN("Failed to get next row", K(ret));
} else if (MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(fill_cur_row_rescan_param())) {
LOG_WARN("fail to fill cur row rescan param", K(ret));
} else if (MY_SPEC.enable_px_batch_rescan_) {
OZ(right_->rescan());
OX(brs_holder_.reset());
}
if (OB_SUCC(ret)) {
left_row_joined_ = false;
}
}
}
return ret;
}
int ObNestedLoopJoinOp::deep_copy_dynamic_obj()
{
int ret = OB_SUCCESS;
int64_t param_cnt = get_spec().rescan_params_.count();
ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_);
ParamStore &param_store = plan_ctx->get_param_store_for_update();
if (OB_ISNULL(mem_context_)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("mem entity not init", K(ret));
}
for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) {
const ObDynamicParamSetter &rescan_param = get_spec().rescan_params_.at(i);
int64_t param_idx = rescan_param.param_idx_;
if (OB_FAIL(ob_write_obj(mem_context_->get_arena_allocator(),
param_store.at(param_idx),
bnlj_params_.at(i).data_[bnlj_params_.at(i).count_]))) {
LOG_WARN("deep copy dynamic param", K(ret));
} else {
// das group rescan
bool has_next = false;
if (OB_FAIL(group_join_buffer_.fill_group_buffer())) {
if (OB_ITER_END != ret) {
LOG_WARN("fill group buffer failed", KR(ret));
}
} else if (OB_FAIL(group_join_buffer_.has_next_left_row(has_next))) {
LOG_WARN("check has next failed", KR(ret));
} else if (has_next) {
clear_evaluated_flag();
if (OB_FAIL(group_join_buffer_.rescan_right())) {
if (OB_ITER_END == ret) {
ret = OB_ERR_UNEXPECTED;
}
LOG_WARN("rescan right failed", KR(ret));
} else if (OB_FAIL(group_join_buffer_.fill_cur_row_group_param())) {
LOG_WARN("fill group param failed", KR(ret));
}
} else {
++bnlj_params_.at(i).count_;
ret = OB_ITER_END;
}
if (OB_SUCC(ret)) {
clear_evaluated_flag();
if (OB_FAIL(group_join_buffer_.get_next_row_from_store())) {
if (OB_ITER_END != ret) {
LOG_WARN("get next row failed", KR(ret));
}
} else {
left_row_joined_ = false;
}
}
}
return ret;
}
int ObNestedLoopJoinOp::read_left_func_going()
{
int ret = OB_SUCCESS;
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.group_rescan_ || MY_SPEC.enable_px_batch_rescan_) {
// do nothing
// group nested loop join 已经做过 rescan 了
} else if (OB_FAIL(prepare_rescan_params())) {
@ -694,13 +622,13 @@ int ObNestedLoopJoinOp::read_right_func_end()
bool ObNestedLoopJoinOp::is_full() const
{
return left_store_.get_row_cnt() >= MY_SPEC.left_group_size_;
return left_store_.get_row_cnt() >= MY_SPEC.group_size_;
}
int ObNestedLoopJoinOp::get_left_batch()
{
int ret = OB_SUCCESS;
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.group_rescan_ || MY_SPEC.enable_px_batch_rescan_) {
if (OB_FAIL(group_get_left_batch(left_brs_)) && OB_ITER_END != ret) {
LOG_WARN("fail to get left batch", K(ret));
}
@ -735,127 +663,145 @@ int ObNestedLoopJoinOp::get_left_batch()
int ObNestedLoopJoinOp::group_get_left_batch(const ObBatchRows *&left_brs)
{
int ret = OB_SUCCESS;
left_brs = &left_->get_brs();
if (left_store_iter_.is_valid() && left_store_iter_.has_next()) {
// do nothing
} else {
if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(init_bnlj_params())) {
LOG_WARN("Failed to init bnlj params", K(ret));
} else if (is_left_end_) {
ret = OB_ITER_END;
if (MY_SPEC.enable_px_batch_rescan_) {
left_brs = &left_->get_brs();
if (left_store_iter_.is_valid() && left_store_iter_.has_next()) {
// do nothing
} else {
if (OB_ISNULL(mem_context_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
uint64_t tenant_id =session->get_effective_tenant_id();
lib::ContextParam param;
param.set_mem_attr(tenant_id,
ObModIds::OB_SQL_NLJ_CACHE,
ObCtxIds::WORK_AREA)
.set_properties(lib::USE_TL_PAGE_OPTIONAL);
if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) {
LOG_WARN("create entity failed", K(ret));
} else if (OB_ISNULL(mem_context_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("null memory entity returned", K(ret));
} else if (OB_FAIL(left_store_.init(UINT64_MAX, tenant_id, ObCtxIds::WORK_AREA))) {
LOG_WARN("init row store failed", K(ret));
} else {
left_store_.set_allocator(mem_context_->get_malloc_allocator());
}
}
if (OB_SUCC(ret)) {
// 没有下一个了, 尝试填充 cache.
batch_rescan_ctl_.reuse();
bnlj_cur_idx_ = 0;
left_store_iter_.reset();
left_store_.reset();
mem_context_->get_arena_allocator().reset();
save_last_row_ = false;
ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_);
while (OB_SUCC(ret) && continue_fetching()) {
// need clear evaluated flag, since prepare_rescan_params() will evaluate expression.
clear_evaluated_flag();
if (save_last_batch_) {
last_save_batch_.to_exprs(eval_ctx_);
save_last_batch_ = false;
}
set_param_null();
if (OB_FAIL(left_->get_next_batch(op_max_batch_size_, left_brs_))) {
LOG_WARN("failed to get next left row", K(ret));
} else if (left_brs_->end_) {
is_left_end_ = true;
}
for (int64_t l_idx = 0; OB_SUCC(ret) && l_idx < left_brs_->size_; l_idx++) {
if (left_brs_->skip_->exist(l_idx)) { continue; }
batch_info_guard.set_batch_idx(l_idx);
batch_info_guard.set_batch_size(left_brs_->size_);
if (OB_FAIL(left_store_.add_row(left_->get_spec().output_, &eval_ctx_))) {
LOG_WARN("failed to store left row", K(ret));
// do nothing
} else if (OB_FAIL(prepare_rescan_params(true))) {
LOG_WARN("failed to prepare rescan params", K(ret));
// 下压参数数据是由被换的原始表达式计算生成, 比如c1 = c2 + 1--> c1 = ?;
// 下压参数?的值, 由c2+1计算而来, c2+1的内存是复用的, 如果此时不深拷贝
// 计算query range的下压param, 则可能导致后面query range的结果和
// 前面query range的obobj对应的ptr(string/number类型在obj中ptr)使用相同指针;
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(deep_copy_dynamic_obj())) {
LOG_WARN("fail to deep copy dynamic obj", K(ret));
}
} // for end
}
if (OB_SUCC(ret)) {
set_param_null();
if (left_brs_->size_ == 0 && left_brs_->end_) {
// do nothing
} else {
last_save_batch_.from_exprs(eval_ctx_, left_brs_->skip_, left_brs_->size_);
save_last_batch_ = true;
}
}
clear_evaluated_flag();
}
if (OB_SUCC(ret) ) {
if (left_store_.get_row_cnt() <= 0) {
ret = OB_ITER_END;
} else if (OB_FAIL(left_store_.finish_add_row(false))) {
LOG_WARN("failed to finish add row to row store", K(ret));
} else if (OB_FAIL(left_store_.begin(left_store_iter_))) {
LOG_WARN("failed to begin iterator for chunk row store", K(ret));
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(bind_bnlj_param_to_store())) {
LOG_WARN("bind bnlj param to store failed", K(ret));
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(rescan_right_operator())) {
LOG_WARN("failed to rescan right op", K(ret));
} else {
need_switch_iter_ = false;
}
}
}
}
if (OB_SUCC(ret)) {
int64_t read_size = 0;
int64_t max_size = MY_SPEC.max_batch_size_;
last_save_batch_.extend_save(eval_ctx_, max_size);
if (OB_FAIL(left_store_iter_.get_next_batch(left_->get_spec().output_,
eval_ctx_,
max_size,
read_size))) {
if (OB_ITER_END == ret) {
if (is_left_end_) {
ret = OB_ITER_END;
// do nothing
} else {
LOG_WARN("Failed to get next row", K(ret));
if (OB_ISNULL(mem_context_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
uint64_t tenant_id =session->get_effective_tenant_id();
lib::ContextParam param;
param.set_mem_attr(tenant_id,
ObModIds::OB_SQL_NLJ_CACHE,
ObCtxIds::WORK_AREA)
.set_properties(lib::USE_TL_PAGE_OPTIONAL);
if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) {
LOG_WARN("create entity failed", K(ret));
} else if (OB_ISNULL(mem_context_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("null memory entity returned", K(ret));
} else if (OB_FAIL(left_store_.init(UINT64_MAX, tenant_id, ObCtxIds::WORK_AREA))) {
LOG_WARN("init row store failed", K(ret));
} else {
left_store_.set_allocator(mem_context_->get_malloc_allocator());
}
}
if (OB_SUCC(ret)) {
// 没有下一个了, 尝试填充 cache.
batch_rescan_ctl_.reuse();
left_store_iter_.reset();
left_store_.reset();
mem_context_->get_arena_allocator().reset();
save_last_row_ = false;
ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_);
while (OB_SUCC(ret) && continue_fetching()) {
// need clear evaluated flag, since prepare_rescan_params() will evaluate expression.
clear_evaluated_flag();
if (save_last_batch_) {
last_save_batch_.to_exprs(eval_ctx_);
save_last_batch_ = false;
}
set_param_null();
if (OB_FAIL(left_->get_next_batch(op_max_batch_size_, left_brs_))) {
LOG_WARN("failed to get next left row", K(ret));
} else if (left_brs_->end_) {
is_left_end_ = true;
}
for (int64_t l_idx = 0; OB_SUCC(ret) && l_idx < left_brs_->size_; l_idx++) {
if (left_brs_->skip_->exist(l_idx)) { continue; }
batch_info_guard.set_batch_idx(l_idx);
batch_info_guard.set_batch_size(left_brs_->size_);
if (OB_FAIL(left_store_.add_row(left_->get_spec().output_, &eval_ctx_))) {
LOG_WARN("failed to store left row", K(ret));
// do nothing
} else if (OB_FAIL(prepare_rescan_params(true))) {
LOG_WARN("failed to prepare rescan params", K(ret));
// 下压参数数据是由被换的原始表达式计算生成, 比如c1 = c2 + 1--> c1 = ?;
// 下压参数?的值, 由c2+1计算而来, c2+1的内存是复用的, 如果此时不深拷贝
// 计算query range的下压param, 则可能导致后面query range的结果和
// 前面query range的obobj对应的ptr(string/number类型在obj中ptr)使用相同指针;
}
} // for end
}
if (OB_SUCC(ret)) {
set_param_null();
if (left_brs_->size_ == 0 && left_brs_->end_) {
// do nothing
} else {
last_save_batch_.from_exprs(eval_ctx_, left_brs_->skip_, left_brs_->size_);
save_last_batch_ = true;
}
}
clear_evaluated_flag();
}
if (OB_SUCC(ret) ) {
if (left_store_.get_row_cnt() <= 0) {
ret = OB_ITER_END;
} else if (OB_FAIL(left_store_.finish_add_row(false))) {
LOG_WARN("failed to finish add row to row store", K(ret));
} else if (OB_FAIL(left_store_.begin(left_store_iter_))) {
LOG_WARN("failed to begin iterator for chunk row store", K(ret));
} else {
need_switch_iter_ = false;
}
}
}
}
if (OB_SUCC(ret)) {
const_cast<ObBatchRows *>(left_brs)->skip_->reset(read_size);
const_cast<ObBatchRows *>(left_brs)->size_ = read_size;
const_cast<ObBatchRows *>(left_brs)->end_ = false;
left_row_joined_ = false;
int64_t read_size = 0;
int64_t max_size = MY_SPEC.max_batch_size_;
last_save_batch_.extend_save(eval_ctx_, max_size);
if (OB_FAIL(left_store_iter_.get_next_batch(left_->get_spec().output_,
eval_ctx_,
max_size,
read_size))) {
if (OB_ITER_END == ret) {
// do nothing
} else {
LOG_WARN("Failed to get next row", K(ret));
}
}
if (OB_SUCC(ret)) {
const_cast<ObBatchRows *>(left_brs)->skip_->reset(read_size);
const_cast<ObBatchRows *>(left_brs)->size_ = read_size;
const_cast<ObBatchRows *>(left_brs)->end_ = false;
left_row_joined_ = false;
}
}
} else {
// das group rescan
bool has_next = false;
if (OB_FAIL(group_join_buffer_.batch_fill_group_buffer(op_max_batch_size_, left_brs_))) {
if (OB_ITER_END != ret) {
LOG_WARN("batch fill group buffer failed", KR(ret));
}
} else if (OB_FAIL(group_join_buffer_.has_next_left_row(has_next))) {
LOG_WARN("check has next failed", KR(ret));
} else if (!has_next) {
ret = OB_ITER_END;
}
if (OB_SUCC(ret)) {
int64_t read_size = 0;
int64_t max_size = min(MY_SPEC.max_batch_size_, left_->get_spec().max_batch_size_);
if (OB_FAIL(group_join_buffer_.get_next_batch_from_store(max_size, read_size))) {
if (OB_ITER_END != ret) {
LOG_WARN("get next batch from store failed", KR(ret));
}
} else {
const_cast<ObBatchRows *>(left_brs)->skip_->reset(read_size);
const_cast<ObBatchRows *>(left_brs)->size_ = read_size;
const_cast<ObBatchRows *>(left_brs)->end_ = false;
left_row_joined_ = false;
}
}
}
return ret;
}
@ -870,21 +816,20 @@ int ObNestedLoopJoinOp::process_left_batch()
// Adding seperated guards for left/right children can also solve the problem,
// we don't choose that way due to performance reason.
batch_info_guard.set_batch_size(left_brs_->size_);
if (!MY_SPEC.use_group_ && !MY_SPEC.enable_px_batch_rescan_) {
if (!MY_SPEC.group_rescan_ && !MY_SPEC.enable_px_batch_rescan_) {
batch_info_guard.set_batch_idx(l_idx);
if (left_brs_->skip_->exist(l_idx)) { continue; }
if (OB_FAIL(rescan_params_batch_one(l_idx))) {
LOG_WARN("fail to rescan params", K(ret));
}
} else if (MY_SPEC.use_group_ && !MY_SPEC.enable_px_batch_rescan_) {
// after group rescan, first left row not need switch iter
if (!need_switch_iter_) {
need_switch_iter_ = true;
} else if (OB_FAIL(rescan_right_operator())) {
} else if (MY_SPEC.group_rescan_ && !MY_SPEC.enable_px_batch_rescan_) {
if (OB_FAIL(group_join_buffer_.rescan_right())) {
if (OB_ITER_END == ret) {
ret = OB_ERR_UNEXPECTED;
}
LOG_WARN("fail to switch iterator", K(ret));
LOG_WARN("rescan right failed", KR(ret));
} else if (OB_FAIL(group_join_buffer_.fill_cur_row_group_param())) {
LOG_WARN("fill group param failed", KR(ret));
}
} else if (MY_SPEC.enable_px_batch_rescan_) {
// NOTE: left batch is ALWAYS continous, NO need to check skip for
@ -979,9 +924,9 @@ int ObNestedLoopJoinOp::calc_right_batch_matched_result(
if (0 == conds.count()) {
brs_.skip_->deep_copy(*right_brs->skip_, right_brs->size_);
} else {
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.enable_px_batch_rescan_) {
last_save_batch_.extend_save(eval_ctx_, right_brs->size_);
} else {
} else if (!MY_SPEC.group_rescan_) {
left_batch_.extend_save(eval_ctx_, right_brs->size_);
}
batch_info_guard.set_batch_size(right_brs->size_);
@ -1029,7 +974,7 @@ int ObNestedLoopJoinOp::output()
brs_.skip_->bit_calculate(*left_batch_.skip_, *left_matched_, left_batch_.size_,
[](const uint64_t l, const uint64_t r) { return (l | r); });
}
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.enable_px_batch_rescan_) {
last_save_batch_.extend_save(eval_ctx_, left_batch_.size_);
}
left_batch_.to_exprs(eval_ctx_);
@ -1049,9 +994,9 @@ int ObNestedLoopJoinOp::output()
LOG_WARN("fail to get next batch", K(ret));
}
} else {
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.enable_px_batch_rescan_) {
last_save_batch_.extend_save(eval_ctx_, read_rows);
} else {
} else if (!MY_SPEC.group_rescan_) {
left_batch_.extend_save(eval_ctx_, read_rows);
}
for (int64_t i = 0; OB_SUCC(ret) && i < read_rows; i++) {
@ -1172,90 +1117,5 @@ int ObNestedLoopJoinOp::calc_other_conds_with_update_left_expr(bool &is_match,
return ret;
}
int ObBatchRowDatums::init(const ObExprPtrIArray *exprs, ObIAllocator *alloc, int32_t batch_size)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(alloc) || OB_ISNULL(exprs)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), KP(alloc), KP(exprs));
} else {
char *buf= (char *)alloc->alloc(ObBitVector::memory_size(batch_size)
+ sizeof(ObDatum) * batch_size * exprs->count());
if (NULL == buf) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc memory", K(ret));
} else {
MEMSET(buf, 0, ObBitVector::memory_size(batch_size));
skip_ = to_bit_vector(buf);
alloc_ = alloc;
exprs_ = exprs;
datums_ = reinterpret_cast<ObDatum *>(buf + ObBitVector::memory_size(batch_size));
batch_size_ = batch_size;
size_ = 0;
saved_size_ = 0;
}
}
return ret;
}
void ObBatchRowDatums::from_exprs(ObEvalCtx &ctx, ObBitVector *skip, int64_t size)
{
OB_ASSERT(size <= batch_size_);
OB_ASSERT(OB_NOT_NULL(skip) && OB_NOT_NULL(exprs_));
for (int64_t i = 0; i < exprs_->count(); i++) {
ObExpr *expr = exprs_->at(i);
ObDatum *datums = expr->locate_batch_datums(ctx);
int64_t copy_size = (expr->is_batch_result() ? size: 1) * sizeof(ObDatum);
MEMCPY(datums_ + i * batch_size_, datums, copy_size);
}
size_ = size;
saved_size_ = size;
skip_->deep_copy(*skip, size);
}
void ObBatchRowDatums::extend_save(ObEvalCtx &ctx, int64_t size)
{
if (size > saved_size_) {
for (int64_t i = 0; i < exprs_->count(); i++) {
ObExpr *expr = exprs_->at(i);
if (expr->is_batch_result()) {
ObDatum *datums = expr->locate_batch_datums(ctx);
int64_t copy_size = (size - saved_size_) * sizeof(ObDatum);
MEMCPY(datums_ + i * batch_size_ + saved_size_, datums + saved_size_, copy_size);
}
}
saved_size_ = size;
}
}
void ObBatchRowDatums::to_exprs(ObEvalCtx &ctx)
{
if (saved_size_ > 0) {
for (int64_t i = 0; i < exprs_->count(); i++) {
ObExpr *expr = exprs_->at(i);
ObDatum *datums = expr->locate_batch_datums(ctx);
int64_t copy_size = (expr->is_batch_result() ? saved_size_: 1) * sizeof(ObDatum);
MEMCPY(datums, datums_ + i * batch_size_, copy_size);
}
}
}
void ObBatchRowDatums::to_exprs(ObEvalCtx &ctx, int64_t from_idx, int64_t to_idx)
{
OB_ASSERT(from_idx <= size_ && to_idx <= batch_size_);
OB_ASSERT(!skip_->exist(from_idx));
for (int64_t i = 0; i < exprs_->count(); i++) {
ObExpr *expr = exprs_->at(i);
ObDatum *datums = expr->locate_batch_datums(ctx);
if (!expr->is_batch_result()) {
*datums = *(datums_ + i * batch_size_);
} else {
*(datums + to_idx) = *(datums_ + i * batch_size_ + from_idx) ;
}
}
}
} // end namespace sql
} // end namespace oceanbase

View File

@ -15,6 +15,7 @@
#include "sql/engine/join/ob_basic_nested_loop_join_op.h"
#include "sql/engine/basic/ob_chunk_datum_store.h"
#include "sql/engine/basic/ob_group_join_buffer.h"
#include "sql/engine/basic/ob_material_op.h"
namespace oceanbase
@ -27,48 +28,35 @@ class ObNestedLoopJoinSpec : public ObBasicNestedLoopJoinSpec
public:
ObNestedLoopJoinSpec(common::ObIAllocator &alloc, const ObPhyOperatorType type)
: ObBasicNestedLoopJoinSpec(alloc, type),
use_group_(false),
left_group_size_(BNLJ_DEFAULT_GROUP_SIZE),
left_expr_ids_in_other_cond_(alloc)
group_rescan_(false),
group_size_(OB_MAX_BULK_JOIN_ROWS),
left_expr_ids_in_other_cond_(alloc),
left_rescan_params_(alloc),
right_rescan_params_(alloc)
{}
public:
// for group nested loop join.
bool use_group_;
int64_t left_group_size_;
// for group join buffer
bool group_rescan_;
int64_t group_size_;
ObFixedArray<ObFixedArray<int, common::ObIAllocator>, common::ObIAllocator>
left_expr_ids_in_other_cond_;
// for multi level batch rescan
// NLJ 1
// / \
// TSC 1 NLJ 2
// / \
// TSC 2 TSC 3
// As shown above, for NLJ 2, its left_rescan_params_ stores params used by TSC 2 and
// set by NLJ 1.
// Similarly, for NLJ 2, its right_rescan_params_ stores params used by TSC 3 and set
// by NLJ 1.
common::ObFixedArray<ObDynamicParamSetter, common::ObIAllocator> left_rescan_params_;
common::ObFixedArray<ObDynamicParamSetter, common::ObIAllocator> right_rescan_params_;
private:
DISALLOW_COPY_AND_ASSIGN(ObNestedLoopJoinSpec);
};
struct ObBatchRowDatums
{
ObBatchRowDatums()
: alloc_(NULL), exprs_(NULL), batch_size_(0), datums_(NULL),
skip_(NULL), size_(0), saved_size_(0)
{}
int init(const ObExprPtrIArray *exprs, common::ObIAllocator *alloc, int32_t batch_size);
void from_exprs(ObEvalCtx &ctx, ObBitVector *skip, int64_t size);
void extend_save(ObEvalCtx &ctx, int64_t size);
void to_exprs(ObEvalCtx &ctx);
void to_exprs(ObEvalCtx &ctx, int64_t from_idx, int64_t to_idx);
ObDatum &get_datum(int64_t row_id, int64_t col_id)
{
return datums_[col_id * batch_size_ + row_id];
}
public:
common::ObIAllocator *alloc_;
const ObExprPtrIArray *exprs_;
int32_t batch_size_;
ObDatum *datums_;
ObBitVector *skip_;
int32_t size_;
int32_t saved_size_; // record the saved size, include extend saved size
};
// Nest loop join has no expression result overwrite problem:
//
// LEFT:
@ -124,11 +112,13 @@ public:
batch_mem_ctx_ = nullptr;
}
}
if (MY_SPEC.group_rescan_) {
group_join_buffer_.destroy();
}
ObBasicNestedLoopJoinOp::destroy();
}
ObBatchRescanCtl &get_batch_rescan_ctl() { return batch_rescan_ctl_; }
int fill_cur_row_rescan_param();
int fill_cur_row_bnlj_param();
// Skip restore if child is material operator to save duplicate work
// Note:
// this is a sister function of backup_right_child_exprs(), call it after
@ -174,7 +164,6 @@ private:
int read_left_operate_batch();
int read_left_operate_group_batch();
int group_read_left_operate();
int deep_copy_dynamic_obj();
int read_left_func_going();
int read_left_func_end();
// JS_READ_RIGHT state operation and transfer functions.
@ -199,8 +188,6 @@ private:
ObEvalCtx::BatchInfoScopeGuard &batch_info_guard);
int output();
int inner_get_next_batch(const int64_t max_row_cnt);
int init_bnlj_params();
int bind_bnlj_param_to_store();
// for vectorized end
bool continue_fetching() { return !(left_brs_->end_ || is_full());}
@ -228,13 +215,12 @@ public:
ObChunkDatumStore::Iterator right_store_iter_;
const ObBatchRows *left_brs_;
ObBitVector *left_matched_;
common::ObArrayWrap<ObSqlArrayObj> bnlj_params_;
bool need_switch_iter_;
bool iter_end_;
ObBatchResultHolder brs_holder_;
int64_t op_max_batch_size_;
int64_t max_group_size_;
int64_t bnlj_cur_idx_;
ObGroupJoinBufffer group_join_buffer_;
// for vectorized end
private:
DISALLOW_COPY_AND_ASSIGN(ObNestedLoopJoinOp);

View File

@ -53,7 +53,10 @@ ObSubQueryIterator::ObSubQueryIterator(ObOperator &op)
iter_brs_(NULL),
batch_size_(0),
batch_row_pos_(0),
iter_end_(false)
iter_end_(false),
is_new_batch_(false),
current_group_(0),
das_batch_params_recovery_()
{
}
@ -82,16 +85,79 @@ int ObSubQueryIterator::rewind(const bool reset_onetime_plan /* = false */)
LOG_WARN("failed to rewind iterator", K(ret));
}
} else {
if (OB_FAIL(op_.rescan())) {
LOG_WARN("failed to do rescan", K(ret));
if (parent_->enable_left_das_batch()) {
if (OB_FAIL(alloc_das_batch_store())) {
LOG_WARN("Alloc DAS batch parameter store fail.", K(ret));
} else {
//We use GroupParamBackupGuard to save and resume data in param store.
//1.For SPF operator, we have multiple right child, every time rescan we
//need switch the param in param store, But all of param store index in
//the same array, So we switch all of the param store, but resume them
//after we rescan.
//2.For SPF operator, we nedd to support jump read. Difference child params
//may in the difference group id, current child rescan should not influence
//other child's param store stage.
//3.For nesting SPF with other SPF, parent and child SPF may in difference
//stage rescan or get_next_row, the expr may reuse, So child SPF rescan
//should not change the param store stage.
//So we implement the GroupParamBackupGuard to make Paramstore like a stack,
//protect every time change the Paramstore will be resume.
bool new_group = is_new_batch_;
if (OB_SUCC(ret) && is_new_batch_) {
GroupParamBackupGuard guard(eval_ctx_,
das_batch_params_recovery_,
parent_->get_spec().rescan_params_,
parent_->get_spec().rescan_params_.count());
ret = parent_->bind_das_batch_params_to_store();
if (OB_SUCC(ret) && OB_FAIL(op_.rescan())) {
if(OB_ITER_END == ret) {
ret = OB_ERR_UNEXPECTED;
}
LOG_WARN("failed to do rescan", K(ret));
}
current_group_ = 0;
is_new_batch_ = 0;
}
uint64_t parent_spf_group = 0;
if(OB_SUCC(ret)) {
parent_->get_current_group(parent_spf_group);
}
if (OB_SUCC(ret) && current_group_ < parent_spf_group) {
if (new_group) {
//If in lookup op, we need to call get next row to init all of iter.
op_.get_next_row();
}
int64_t old_jump_read_group_id;
old_jump_read_group_id = op_.get_exec_ctx().get_das_ctx().jump_read_group_id_;
op_.get_exec_ctx().get_das_ctx().jump_read_group_id_ = parent_spf_group;
if (OB_FAIL(op_.rescan())) {
if(OB_ITER_END == ret) {
ret = OB_ERR_UNEXPECTED;
}
LOG_WARN("Das jump read rescan fail.", K(ret));
}
op_.get_exec_ctx().get_das_ctx().jump_read_group_id_ = old_jump_read_group_id;
current_group_ = parent_spf_group;
}
}
} else {
//No batch branch
if (OB_SUCC(ret) && OB_FAIL(op_.rescan())) {
LOG_WARN("failed to do rescan", K(ret));
}
}
}
iter_end_ = false;
//for vectorize mode, SPF iter may have a stored batch to process
//should reset them in rewind()
iter_brs_ = NULL;
batch_size_ = 0;
batch_row_pos_ = 0;
if (OB_SUCC(ret)) {
iter_end_ = false;
//for vectorize mode, SPF iter may have a stored batch to process
//should reset them in rewind()
iter_brs_ = NULL;
batch_size_ = 0;
batch_row_pos_ = 0;
}
return ret;
}
@ -104,6 +170,9 @@ void ObSubQueryIterator::reuse()
batch_size_ = 0;
batch_row_pos_ = 0;
iter_end_ = false;
is_new_batch_ = false;
current_group_ = 0;
das_batch_params_recovery_.reset();
}
//TODO 移到对应的expr, 设置一个标记确保只计算一次
@ -248,6 +317,55 @@ int ObSubQueryIterator::reset_hash_map()
return ret;
}
int ObSubQueryIterator::alloc_das_batch_store()
{
int ret = OB_SUCCESS;
int64_t params_count = 0;
params_count = parent_->get_spec().rescan_params_.count();
if (!das_batch_params_recovery_.empty()) {
//Do nothing
OB_ASSERT(params_count == das_batch_params_recovery_.count());
} else {
ObIAllocator& alloc = op_.get_exec_ctx().get_allocator();
if (OB_FAIL(das_batch_params_recovery_.allocate_array(alloc, params_count))) {
LOG_WARN("Alloc das batch params fail." , K(ret));
}
}
return ret;
}
void GroupParamBackupGuard::save_das_batch_store()
{
//int64_t params_count = 0;
//params_count = parent_->get_spec().rescan_params_.count();
OB_ASSERT(!das_batch_params_recovery_.empty());
OB_ASSERT(das_batch_params_recovery_.count() == params_count_);
ObPhysicalPlanCtx *phy_ctx = eval_ctx_.exec_ctx_.get_physical_plan_ctx();
ParamStore &param_store = phy_ctx->get_param_store_for_update();
for (int64_t i = 0; i < params_count_; ++i) {
const ObDynamicParamSetter &rescan_param = rescan_params_.at(i);
//Always shallow copy for state save.
das_batch_params_recovery_.at(i) = param_store.at(rescan_param.param_idx_);
}
}
void GroupParamBackupGuard::resume_das_batch_store()
{
OB_ASSERT(!das_batch_params_recovery_.empty());
OB_ASSERT(das_batch_params_recovery_.count() == params_count_);
ObPhysicalPlanCtx *phy_ctx = eval_ctx_.exec_ctx_.get_physical_plan_ctx();
ParamStore &param_store = phy_ctx->get_param_store_for_update();
for (int64_t i = 0; i < params_count_; ++i) {
const ObDynamicParamSetter &rescan_param = rescan_params_.at(i);
//Always shallow copy for state resume.
param_store.at(rescan_param.param_idx_) = das_batch_params_recovery_.at(i);
ObExpr *dst = rescan_param.dst_;
ObDatum &param_datum = dst->locate_datum_for_write(eval_ctx_);
param_datum.from_obj(das_batch_params_recovery_.at(i), dst->obj_datum_map_);
}
}
ObSubPlanFilterSpec::ObSubPlanFilterSpec(ObIAllocator &alloc, const ObPhyOperatorType type)
: ObOpSpec(alloc, type),
rescan_params_(alloc),
@ -260,7 +378,9 @@ ObSubPlanFilterSpec::ObSubPlanFilterSpec(ObIAllocator &alloc, const ObPhyOperato
enable_px_batch_rescans_(alloc),
enable_das_batch_rescans_(false),
filter_exprs_(alloc),
output_exprs_(alloc)
output_exprs_(alloc),
left_rescan_params_(alloc),
right_rescan_params_(alloc)
{
}
@ -275,7 +395,9 @@ OB_SERIALIZE_MEMBER((ObSubPlanFilterSpec, ObOpSpec),
enable_px_batch_rescans_,
enable_das_batch_rescans_,
filter_exprs_,
output_exprs_);
output_exprs_,
left_rescan_params_,
right_rescan_params_);
DEF_TO_STRING(ObSubPlanFilterSpec)
{
@ -301,12 +423,15 @@ ObSubPlanFilterOp::ObSubPlanFilterOp(
update_set_mem_(NULL),
iter_end_(false),
enable_left_px_batch_(false),
enable_left_das_batch_(false),
max_group_size_(0),
current_group_(0),
das_batch_params_(),
left_rows_(),
left_rows_iter_(),
last_store_row_(),
save_last_row_(false),
is_left_end_(false),
left_row_idx_(0),
batch_rescan_ctl_(),
cur_params_(),
cur_param_idxs_(),
@ -366,23 +491,35 @@ int ObSubPlanFilterOp::rescan()
LOG_WARN("failed to inner rescan", K(ret));
}
if (OB_SUCC(ret) && enable_left_px_batch_) {
if (OB_SUCC(ret) &&
(MY_SPEC.enable_das_batch_rescans_ || enable_left_px_batch_)) {
left_rows_.reset();
left_rows_iter_.reset();
batch_rescan_ctl_.reuse();
is_left_end_ = false;
save_last_row_ = false;
last_store_row_.reset();
}
if (OB_SUCC(ret) && MY_SPEC.enable_das_batch_rescans_) {
//We do not need alloc memory again in rescan.
//das_batch_params_.reset();
current_group_ = 0;
}
if (OB_SUCC(ret) && enable_left_px_batch_) {
batch_rescan_ctl_.reuse();
cur_params_.reset();
cur_param_idxs_.reset();
cur_param_expr_idxs_.reset();
save_last_row_ = false;
last_store_row_.reset();
brs_holder_.reset();
}
for (int32_t i = 1; OB_SUCC(ret) && i < child_cnt_; ++i) {
if (OB_FAIL(children_[i]->rescan())) {
LOG_WARN("rescan child operator failed", K(ret),
"op", op_name(), "child", children_[i]->op_name());
if (!MY_SPEC.enable_das_batch_rescans_) {
for (int32_t i = 1; OB_SUCC(ret) && i < child_cnt_; ++i) {
if (OB_FAIL(children_[i]->rescan())) {
LOG_WARN("rescan child operator failed", K(ret),
"op", op_name(), "child", children_[i]->op_name());
}
}
}
for (int32_t i = 1; OB_SUCC(ret) && i < child_cnt_; ++i) {
@ -495,7 +632,6 @@ int ObSubPlanFilterOp::inner_open()
MY_SPEC.enable_px_batch_rescans_.at(i)) {
enable_left_px_batch_ = true;
}
enable_left_das_batch_ = MY_SPEC.enable_das_batch_rescans_;
if (!MY_SPEC.exec_param_idxs_inited_) {
//unittest or old version, do not init hashmap
} else if (OB_FAIL(iter->init_mem_entity())) {
@ -514,7 +650,19 @@ int ObSubPlanFilterOp::inner_open()
}
}
}
if (enable_left_px_batch_ && OB_ISNULL(last_store_row_mem_)) {
//BATCH SUBPLAN FILTER {
if (OB_SUCC(ret) && MY_SPEC.enable_das_batch_rescans_) {
max_group_size_ = OB_MAX_BULK_JOIN_ROWS;
if(OB_FAIL(alloc_das_batch_params(max_group_size_+MY_SPEC.max_batch_size_))) {
LOG_WARN("Fail to alloc das batch params.", K(ret));
}
}
//} BATCH SUBPLAN FILTER END
//left_rows used by px_batch and das batch.
if (OB_SUCC(ret) &&
(enable_left_px_batch_ || MY_SPEC.enable_das_batch_rescans_) &&
OB_ISNULL(last_store_row_mem_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
uint64_t tenant_id =session->get_effective_tenant_id();
lib::ContextParam param;
@ -545,6 +693,9 @@ int ObSubPlanFilterOp::inner_close()
{
destroy_subplan_iters();
destroy_update_set_mem();
if (MY_SPEC.enable_das_batch_rescans_) {
das_batch_params_.reset();
}
return OB_SUCCESS;
}
@ -575,15 +726,36 @@ int ObSubPlanFilterOp::handle_next_row()
OZ(prepare_onetime_exprs());
}
if (OB_FAIL(ret)) {
} else if (enable_left_px_batch_) {
} else if (enable_left_px_batch_ || MY_SPEC.enable_das_batch_rescans_) {
//DAS batch spf is conflict with PX batch spf
OB_ASSERT(!(enable_left_px_batch_ && MY_SPEC.enable_das_batch_rescans_));
bool has_row = false;
int batch_count = PX_RESCAN_BATCH_ROW_COUNT;
int batch_count = 0;
batch_count = MY_SPEC.enable_das_batch_rescans_ ? max_group_size_ : PX_RESCAN_BATCH_ROW_COUNT;
if (left_rows_iter_.is_valid() && left_rows_iter_.has_next()) {
batch_rescan_ctl_.cur_idx_++;
if(MY_SPEC.enable_das_batch_rescans_) {
//das batch branch
//Consume the remaining batch data in left store.
current_group_++;
} else {
OB_ASSERT(enable_left_px_batch_);
//px batch branch
batch_rescan_ctl_.cur_idx_++;
}
} else if (is_left_end_) {
ret = OB_ITER_END;
} else {
batch_rescan_ctl_.reuse();
//Accumulate a new batch into left store.
if(enable_left_px_batch_) {
batch_rescan_ctl_.reuse();
}
if (MY_SPEC.enable_das_batch_rescans_) {
current_group_ = 0;
//Always OB_SUCCESS in current implement.
if(OB_FAIL(init_das_batch_params())) {
LOG_WARN("Failed to init das batch params", K(ret));
}
}
left_rows_iter_.reset();
left_rows_.reset();
last_store_row_mem_->get_arena_allocator().reset();
@ -614,8 +786,10 @@ int ObSubPlanFilterOp::handle_next_row()
}
} else if (OB_FAIL(left_rows_.add_row(child_->get_spec().output_, &eval_ctx_))) {
LOG_WARN("fail to add row", K(ret));
} else if (OB_FAIL(prepare_rescan_params(true))) {
} else if (enable_left_px_batch_ && OB_FAIL(prepare_rescan_params(true))) {
LOG_WARN("fail to prepare rescan params", K(ret));
} else if (MY_SPEC.enable_das_batch_rescans_ && OB_FAIL(deep_copy_dynamic_obj())) {
LOG_WARN("fail to deep copy dynamic obj", K(ret));
} else {
has_row = true;
}
@ -633,15 +807,31 @@ int ObSubPlanFilterOp::handle_next_row()
ret = OB_SUCCESS;
OZ(left_rows_.finish_add_row(false));
OZ(left_rows_.begin(left_rows_iter_));
if (MY_SPEC.enable_das_batch_rescans_) {
//Lazy batch rescan right iterator.
//Just set the flag, do the rescan when call the iter->rewind().
for(int32_t i = 1; OB_SUCC(ret) && i < child_cnt_; ++i) {
Iterator *&iter = subplan_iters_.at(i - 1);
iter->set_new_batch(true);
}
}
}
}
//After accumulate a new batch or previous have remaining row.
if (OB_SUCC(ret)) {
clear_evaluated_flag();
// fetch datum from left_row_iter_ instead of child operator
if (OB_FAIL(left_rows_iter_.get_next_row(child_->get_spec().output_, eval_ctx_))) {
LOG_WARN("Failed to get next row", K(ret));
} else {
} else if (enable_left_px_batch_) {
//px batch spf branch
OZ(fill_cur_row_rescan_param());
} else {
//das batch spf branch
OB_ASSERT(MY_SPEC.enable_das_batch_rescans_);
if (OB_FAIL(fill_cur_row_das_batch_param(eval_ctx_, current_group_))) {
LOG_WARN("Filed to prepare das batch rescan params", K(ret));
}
}
}
} else if (FALSE_IT(clear_evaluated_flag())) {
@ -790,6 +980,144 @@ int ObSubPlanFilterOp::handle_next_batch_with_px_rescan(const int64_t op_max_bat
return ret;
}
int ObSubPlanFilterOp::handle_next_batch_with_group_rescan(const int64_t op_max_batch_size)
{
int ret = OB_SUCCESS;
const ObBatchRows *child_brs = NULL;
bool stop_fetch = false;
ObEvalCtx::BatchInfoScopeGuard guard(eval_ctx_);
uint64_t left_rows_total_cnt = 0;
if (left_rows_iter_.is_valid() && left_rows_iter_.has_next()) {
// fetch data from left store
} else {
// 1. material data from child into left_rows_
// 2. prepare batch rescan params
left_rows_.reset();
left_rows_iter_.reset();
(void) brs_holder_.restore();
current_group_ = 0;
if(OB_FAIL(init_das_batch_params())) {
LOG_WARN("Failed to init das batch params", K(ret));
}
while (OB_SUCC(ret) && continue_fetching(left_rows_total_cnt, stop_fetch, true)) {
set_param_null();
clear_evaluated_flag();
int64_t store_row_cnt = -1;
if (OB_FAIL(child_->get_next_batch(op_max_batch_size, child_brs))) {
LOG_WARN("fail to get next batch", K(ret));
} else if (OB_FAIL(left_rows_.add_batch(child_->get_spec().output_, eval_ctx_,
*child_brs->skip_, child_brs->size_, store_row_cnt))) {
LOG_WARN("fail to add expr datums to left_rows_", K(ret));
} else {
stop_fetch = child_brs->end_;
left_rows_total_cnt += store_row_cnt;
guard.set_batch_size(child_brs->size_);
clear_evaluated_flag();
// prepare group batch rescan parameter
for (int64_t l_idx = 0; OB_SUCC(ret) && l_idx < child_brs->size_; l_idx++) {
if (child_brs->skip_->exist(l_idx)) { continue; }
guard.set_batch_idx(l_idx);
if (OB_FAIL(deep_copy_dynamic_obj())) {
LOG_WARN("deep_copy_dynamic_obj", K(ret));
}
}
}
}
if (OB_SUCC(ret)) {
if (!child_brs->end_) {
// backup child datums into brs_holder_
OZ(brs_holder_.save(MY_SPEC.max_batch_size_));
}
if (OB_SUCC(ret)) {
if (OB_FAIL(left_rows_.finish_add_row(false))) {
LOG_WARN("prepare rescan params failed", K(ret));
} else if (OB_FAIL(left_rows_.begin(left_rows_iter_))) {
LOG_WARN("prepare rescan params failed", K(ret));
} else if (left_rows_total_cnt != left_rows_.get_row_cnt()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("left_rows row cnt is unexpectd", K(ret));
}
}
if (OB_SUCC(ret)) {
//Lazy batch rescan right iterator.
//Just set the flag, do the rescan when call the iter->rewind().
for(int32_t i = 1; OB_SUCC(ret) && i < child_cnt_; ++i) {
Iterator *&iter = subplan_iters_.at(i - 1);
iter->set_new_batch(true);
}
}
}
}
// fetch data from masterized left_rows(ChunkDatumStore) and do filtering
if (OB_SUCC(ret)) {
int64_t rows_fetched = 0;
clear_evaluated_flag();
if (OB_FAIL(left_rows_iter_.get_next_batch(child_->get_spec().output_,
eval_ctx_, op_max_batch_size, rows_fetched))) {
if (OB_ITER_END == ret) {
brs_.size_ = rows_fetched;
brs_.end_ = true;
iter_end_ = true;
OB_ASSERT(0 == brs_.size_);
OB_ASSERT(0 == left_rows_total_cnt);
ret = OB_SUCCESS;
} else {
LOG_WARN("left_rows_iter_.get_next_batch failed", K(ret));
}
} else {
// Note: rows are fetched from left_rows(ChunkDatumStore), so there is no
// skip row.
// Do not change brs_.skip_
brs_.size_ = rows_fetched;
left_rows_total_cnt -= rows_fetched; // debug only
guard.set_batch_size(brs_.size_);
for (int64_t l_idx = 0; OB_SUCC(ret) && l_idx < brs_.size_; l_idx++) {
guard.set_batch_idx(l_idx);
if (OB_FAIL(fill_cur_row_das_batch_param(eval_ctx_, current_group_))) {
LOG_WARN("fill_cur_row_das_batch_param failed", K(ret));
} else {
if (need_init_before_get_row_) {
for (int32_t i = 1; OB_SUCC(ret) && i < child_cnt_; ++i) {
Iterator *&iter = subplan_iters_.at(i - 1);
if (MY_SPEC.init_plan_idxs_.has_member(i)) {
OZ(iter->prepare_init_plan());
}
}
need_init_before_get_row_ = false;
}
}
if (OB_SUCC(ret)) {
bool filtered = false;
if (OB_FAIL(filter_row(eval_ctx_, MY_SPEC.filter_exprs_, filtered))) {
LOG_WARN("fail to filter row", K(ret));
} else if (filtered) {
brs_.skip_->set(l_idx);
} else {
ObDatum *datum = NULL;
FOREACH_CNT_X(e, spec_.output_, OB_SUCC(ret)) {
if (OB_FAIL((*e)->eval(eval_ctx_, datum))) {
LOG_WARN("expr evaluate failed", K(ret), K(*e));
}
}
}
}
current_group_++;
} // for end
LOG_DEBUG("show batch_rescan_ctl_ info ", K(batch_rescan_ctl_),
K(rows_fetched), K(left_rows_total_cnt));
}
}
FOREACH_CNT_X(e, spec_.output_, OB_SUCC(ret)) {
(*e)->get_eval_info(eval_ctx_).projected_ = true;
}
return ret;
}
int ObSubPlanFilterOp::inner_get_next_batch(const int64_t max_row_cnt)
{
int ret = OB_SUCCESS;
@ -799,7 +1127,13 @@ int ObSubPlanFilterOp::inner_get_next_batch(const int64_t max_row_cnt)
}
//从主表中获取一行数据
clear_evaluated_flag();
if (enable_left_px_batch_) {
if(OB_FAIL(ret)) {
LOG_WARN("prepare_onetime_expr fail.", K(ret));
} else if (MY_SPEC.enable_das_batch_rescans_) {
if (OB_FAIL(handle_next_batch_with_group_rescan(op_max_batch_size))) {
LOG_WARN("handle_next_batch_with_group_rescan failed", K(ret));
}
} else if (enable_left_px_batch_) {
if (OB_FAIL(handle_next_batch_with_px_rescan(op_max_batch_size))) {
LOG_WARN("handle_next_batch_with_px_rescan failed", K(ret));
}
@ -1031,5 +1365,127 @@ int ObSubPlanFilterOp::handle_update_set()
return ret;
}
int ObSubPlanFilterOp::alloc_das_batch_params(uint64_t group_size)
{
int ret = OB_SUCCESS;
if (das_batch_params_.empty()) {
ret = das_batch_params_.allocate_array(ctx_.get_allocator(),
MY_SPEC.rescan_params_.count());
if(OB_SUCC(ret)) {
uint64_t obj_buf_size = sizeof(ObObjParam) * group_size;
for (int64_t i = 0; OB_SUCC(ret) && i < das_batch_params_.count(); ++i) {
void *buf = ctx_.get_allocator().alloc(obj_buf_size);
if (NULL == buf) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate das params array buf failed", K(ret), K(i), K(obj_buf_size));
} else {
das_batch_params_.at(i).data_ = reinterpret_cast<ObObjParam*>(buf);
das_batch_params_.at(i).count_ = 0;
ObExpr *dst_expr = MY_SPEC.rescan_params_.at(i).dst_;
das_batch_params_.at(i).element_.set_meta_type(dst_expr->obj_meta_);
}
}
} else {
LOG_WARN("allocate das params failed", KR(ret), K(MY_SPEC.rescan_params_.count()));
}
}
return ret;
}
int ObSubPlanFilterOp::init_das_batch_params()
{
OB_ASSERT(!das_batch_params_.empty());
int ret = OB_SUCCESS;
for (int64_t i = 0; OB_SUCC(ret) && i < das_batch_params_.count(); ++i) {
das_batch_params_.at(i).count_ = 0;
}
return ret;
}
int ObSubPlanFilterOp::deep_copy_dynamic_obj()
{
int ret = OB_SUCCESS;
ObObjParam *param = NULL;
int64_t param_cnt = MY_SPEC.rescan_params_.count();
if (OB_ISNULL(last_store_row_mem_)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("mem entity not init", K(ret));
}
for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) {
const ObDynamicParamSetter &rescan_param = MY_SPEC.rescan_params_.at(i);
if (OB_FAIL(rescan_param.set_dynamic_param(eval_ctx_, param))) {
LOG_WARN("fail to set dynamic param", K(ret));
} else if (OB_ISNULL(param)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("param is null", K(ret));
} else if (OB_FAIL(ob_write_obj(last_store_row_mem_->get_arena_allocator(),
*param,
das_batch_params_.at(i).data_[das_batch_params_.at(i).count_]))) {
LOG_WARN("deep copy dynamic param", KR(ret));
} else {
++das_batch_params_.at(i).count_;
}
}
return ret;
}
int ObSubPlanFilterOp::fill_cur_row_das_batch_param(ObEvalCtx& eval_ctx, uint64_t current_group) const
{
int ret = OB_SUCCESS;
ObPhysicalPlanCtx *plan_ctx = ctx_.get_physical_plan_ctx();
if (das_batch_params_.empty() || current_group >= das_batch_params_.at(0).count_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("row idx is unexpected", K(ret),
K(current_group), K(das_batch_params_.at(0).count_));
} else {
int64_t param_cnt = das_batch_params_.count();
if (unlikely(MY_SPEC.rescan_params_.count() != param_cnt)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("das params count is invalid", KR(ret), K(param_cnt), K(das_batch_params_.count()));
}
for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) {
const ObDynamicParamSetter &rescan_param = MY_SPEC.rescan_params_.at(i);
int64_t param_idx = rescan_param.param_idx_;
ObExpr *dst = rescan_param.dst_;
ObDatum &param_datum = dst->locate_datum_for_write(eval_ctx);
const ObSqlArrayObj &arr = das_batch_params_.at(i);
if (OB_FAIL(param_datum.from_obj(arr.data_[current_group], dst->obj_datum_map_))) {
LOG_WARN("fail to cast datum", K(ret));
} else {
plan_ctx->get_param_store_for_update().at(param_idx) = arr.data_[current_group];
dst->set_evaluated_projected(eval_ctx);
}
}
}
return ret;
}
int ObSubPlanFilterOp::bind_das_batch_params_to_store() const
{
int ret = OB_SUCCESS;
int64_t param_cnt = MY_SPEC.rescan_params_.count();
ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_);
ParamStore &param_store = plan_ctx->get_param_store_for_update();
if (OB_UNLIKELY(param_cnt != das_batch_params_.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("das params count is invalid", KR(ret), K(param_cnt), K(das_batch_params_.count()));
}
for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) {
const ObDynamicParamSetter &rescan_param = MY_SPEC.rescan_params_.at(i);
int64_t param_idx = rescan_param.param_idx_;
int64_t array_obj_addr = reinterpret_cast<int64_t>(&das_batch_params_.at(i));
param_store.at(param_idx).set_extend(array_obj_addr, T_EXT_SQL_ARRAY);
}
if (OB_SUCC(ret)) {
LOG_DEBUG("bind das param to store", K(das_batch_params_), K(plan_ctx->get_param_store()));
}
return ret;
}
void ObSubPlanFilterOp::get_current_group(uint64_t& current_group) const
{
current_group = current_group_;
}
} // end namespace sql
} // end namespace oceanbase

View File

@ -93,6 +93,8 @@ public:
int get_next_batch(const int64_t max_row_cnt, const ObBatchRows *&batch_rows);
//for vectorized end
bool is_onetime_plan() const { return onetime_plan_; }
void set_new_batch(bool new_batch) { is_new_batch_ = new_batch;};
TO_STRING_KV(K(onetime_plan_), K(init_plan_), K(inited_));
//a row cache for hash optimizer to use
@ -102,6 +104,11 @@ public:
private:
// for das batch spf
int alloc_das_batch_store();
int save_das_batch_store();
int resume_das_batch_store();
// for das batch spf end
ObOperator &op_;
bool onetime_plan_;
bool init_plan_;
@ -124,6 +131,12 @@ private:
int64_t batch_row_pos_;
bool iter_end_;
// for vectorized end
// for das batch spf
bool is_new_batch_;
uint64_t current_group_;
common::ObArrayWrap<ObObjParam> das_batch_params_recovery_;
// for das batch spf end
};
class ObSubPlanFilterSpec : public ObOpSpec
@ -154,6 +167,8 @@ public:
bool enable_das_batch_rescans_;
ExprFixedArray filter_exprs_;
ExprFixedArray output_exprs_;
common::ObFixedArray<ObDynamicParamSetter, common::ObIAllocator> left_rescan_params_;
common::ObFixedArray<ObDynamicParamSetter, common::ObIAllocator> right_rescan_params_;
};
class ObSubPlanFilterOp : public ObOperator
@ -181,17 +196,28 @@ public:
}
int handle_next_row();
bool enable_px_batch_rescan() { return enable_left_px_batch_; }
bool enable_das_batch_rescan() { return enable_left_das_batch_; }
//for vectorized
int inner_get_next_batch(const int64_t max_row_cnt);
// for vectorized end
int init_left_cur_row(const int64_t column_cnt, ObExecContext &ctx);
int fill_cur_row_rescan_param();
//for DAS batch SPF
int fill_cur_row_das_batch_param(ObEvalCtx& eval_ctx, uint64_t current_group) const;
int bind_das_batch_params_to_store() const;
void get_current_group(uint64_t& current_group) const;
bool enable_left_das_batch() const {return MY_SPEC.enable_das_batch_rescans_;}
//for DAS batch SPF end
const ObSubPlanFilterSpec &get_spec() const
{ return static_cast<const ObSubPlanFilterSpec &>(spec_); }
public:
ObBatchRescanCtl &get_batch_rescan_ctl() { return batch_rescan_ctl_; }
static const int64_t PX_RESCAN_BATCH_ROW_COUNT = 8192;
int handle_next_batch_with_px_rescan(const int64_t op_max_batch_size);
int handle_next_batch_with_group_rescan(const int64_t op_max_batch_size);
private:
void set_param_null() { set_pushdown_param_null(MY_SPEC.rescan_params_); };
void destroy_subplan_iters();
@ -208,19 +234,31 @@ private:
int prepare_onetime_exprs();
int prepare_onetime_exprs_inner();
int handle_update_set();
bool continue_fetching(uint64_t left_rows_total_cnt, bool stop)
bool continue_fetching(uint64_t left_rows_total_cnt, bool stop, bool use_group = false)
{
return (!stop && (left_rows_total_cnt < PX_RESCAN_BATCH_ROW_COUNT));
return use_group?
(!stop && (left_rows_total_cnt < max_group_size_))
:(!stop && (left_rows_total_cnt < PX_RESCAN_BATCH_ROW_COUNT));
}
// for das batch spf
int alloc_das_batch_params(uint64_t group_size);
int init_das_batch_params();
int deep_copy_dynamic_obj();
// for das batch spf end
private:
common::ObSEArray<Iterator *, 16> subplan_iters_;
lib::MemoryContext update_set_mem_;
bool iter_end_;
// for px batch rescan
bool enable_left_px_batch_;
// for px batch rescan end
// for das batch rescan
bool enable_left_das_batch_;
uint64_t max_group_size_; //Das batch rescan size;
uint64_t current_group_; //The group id in this time right iter rescan;
common::ObArrayWrap<ObSqlArrayObj> das_batch_params_;
// for das batch rescan end
ObChunkDatumStore left_rows_;
ObChunkDatumStore::Iterator left_rows_iter_;
ObChunkDatumStore::ShadowStoredRow last_store_row_;
@ -237,6 +275,35 @@ private:
ObBatchResultHolder brs_holder_;
};
class GroupParamBackupGuard
{
public:
GroupParamBackupGuard(ObEvalCtx& eval_ctx,
common::ObArrayWrap<ObObjParam>& das_batch_params_recovery,
const common::ObFixedArray<ObDynamicParamSetter, common::ObIAllocator>& rescan_params,
int64_t params_count)
: eval_ctx_(eval_ctx),
das_batch_params_recovery_(das_batch_params_recovery),
rescan_params_(rescan_params),
params_count_(params_count)
{
save_das_batch_store();
}
~GroupParamBackupGuard()
{
resume_das_batch_store();
}
private:
void save_das_batch_store();
void resume_das_batch_store();
private:
ObEvalCtx& eval_ctx_;
common::ObArrayWrap<ObObjParam>& das_batch_params_recovery_;
const common::ObFixedArray<ObDynamicParamSetter, common::ObIAllocator>& rescan_params_;
int64_t params_count_;
};
} // end namespace sql
} // end namespace oceanbase

View File

@ -678,8 +678,8 @@ int ObTableScanOp::prepare_pushdown_limit_param()
} else if (MY_SPEC.batch_scan_flag_) {
//batch scan can not pushdown limit param to storage
need_final_limit_ = true;
limit_param_.offset_ = 0;
limit_param_.limit_ = -1;
tsc_rtdef_.scan_rtdef_.limit_param_.offset_ = 0;
tsc_rtdef_.scan_rtdef_.limit_param_.limit_ = -1;
} else if (tsc_rtdef_.has_lookup_limit() || das_ref_.get_das_task_cnt() > 1) {
//for index back, need to final limit output rows in TableScan operator,
//please see me for the reason: https://work.aone.alibaba-inc.com/issue/43232745
@ -1226,8 +1226,8 @@ int ObTableScanOp::inner_open()
}
if (OB_SUCC(ret)) {
// here need add plan batch_size, because in vectorized execution,
// left batch may greater than BNLJ_DEFAULT_GROUP_SIZE
max_group_size_ = BNLJ_DEFAULT_GROUP_SIZE + MY_SPEC.plan_->get_batch_size();
// left batch may greater than OB_MAX_BULK_JOIN_ROWS
max_group_size_ = OB_MAX_BULK_JOIN_ROWS + MY_SPEC.plan_->get_batch_size();
if (MY_CTDEF.pre_query_range_.get_is_equal_and()) {
int64_t column_count = MY_CTDEF.pre_query_range_.get_column_count();
size_t range_size = sizeof(ObNewRange) + sizeof(ObObj) * column_count * 2;
@ -1398,7 +1398,7 @@ int ObTableScanOp::inner_rescan()
// replaced by NLJ.
LOG_WARN("build batch nlj params failed", KR(ret));
} else if (!need_fetch_batch_result()) {
ret = switch_batch_iter();
ret = set_batch_iter(ctx_.get_das_ctx().jump_read_group_id_);
} else {
if (is_virtual_table(MY_SPEC.ref_table_id_)
|| !das_ref_.is_all_local_task()
@ -1563,6 +1563,37 @@ int ObTableScanOp::switch_batch_iter()
return ret;
}
int ObTableScanOp::set_batch_iter(int64_t group_id)
{
int ret = OB_SUCCESS;
for (DASTaskIter task_iter = das_ref_.begin_task_iter();
OB_SUCC(ret) && !task_iter.is_end(); ++task_iter) {
ObDASGroupScanOp *group_scan_op = DAS_GROUP_SCAN_OP(*task_iter);
if (OB_FAIL(group_scan_op->set_scan_group(group_id))) {
if (OB_ITER_END != ret) {
LOG_WARN("switch batch iter failed", K(ret));
} else {
iter_end_ = true;
}
}
}
if (OB_SUCC(ret) && !iter_end_) {
if (!das_ref_.has_task()) {
iter_end_ = true;
} else {
//prepare to output row
scan_result_ = das_ref_.begin_result_iter();
if (OB_FAIL(update_output_tablet_id())) {
LOG_WARN("update output row pkey failed", K(ret), KPC(scan_result_.get_tablet_loc()));
}
}
}
return ret;
}
int ObTableScanOp::get_next_row_with_das()
{
int ret = OB_SUCCESS;
@ -1589,12 +1620,31 @@ int ObTableScanOp::get_next_row_with_das()
LOG_WARN("get next row from das result failed", K(ret));
}
} else {
++input_row_cnt_;
// We need do filter first before do the limit.
// See the issue 47201028.
bool filtered = false;
if (need_final_limit_ && !MY_SPEC.filters_.empty()) {
if (OB_FAIL(filter_row(filtered))) {
LOG_WARN("das get_next_row filter row failed", K(ret));
} else {
if(filtered) {
//Do nothing
} else {
++input_row_cnt_;
}
}
} else {
++input_row_cnt_;
}
if (need_final_limit_ && input_row_cnt_ <= limit_param_.offset_) {
continue;
} else {
++output_row_cnt_;
got_row = true;
if (need_final_limit_ && !MY_SPEC.filters_.empty() && filtered) {
//Do nothing
} else {
++output_row_cnt_;
got_row = true;
}
}
}
}
@ -1635,7 +1685,26 @@ int ObTableScanOp::get_next_batch_with_das(int64_t &count, int64_t capacity)
LOG_WARN("get next batch from das result failed", K(ret));
}
} else {
input_row_cnt_ += count;
// We need do filter first before do the limit.
// See the issue 47201028.
if(need_final_limit_ && !MY_SPEC.filters_.empty() && count > 0) {
bool all_filtered = false;
if (OB_FAIL(filter_batch_rows(MY_SPEC.filters_,
*brs_.skip_,
count,
all_filtered))) {
LOG_WARN("filter batch failed in das get_next_batch", K(ret));
} else if (all_filtered) {
//Do nothing.
brs_.skip_->reset(count);
} else {
int64_t skipped_rows_count = brs_.skip_->accumulate_bit_cnt(count);
input_row_cnt_ += count - skipped_rows_count;
brs_.skip_->reset(count);
}
} else {
input_row_cnt_ += count;
}
}
}
if (OB_SUCC(ret) && need_final_limit_) {
@ -1671,9 +1740,29 @@ int ObTableScanOp::get_next_batch_with_das(int64_t &count, int64_t capacity)
LOG_WARN("get next batch from das result failed", K(ret));
}
} else {
got_batch = true;
output_row_cnt_ += count;
input_row_cnt_ += count;
// We need do filter first before do the limit.
// See the issue 47201028.
if (need_final_limit_ && !MY_SPEC.filters_.empty() && count > 0) {
bool all_filtered = false;
if (OB_FAIL(filter_batch_rows(MY_SPEC.filters_,
*brs_.skip_,
count,
all_filtered))) {
LOG_WARN("filter batch failed in das get_next_batch", K(ret));
} else if (all_filtered) {
//Do nothing.
brs_.skip_->reset(count);
} else {
int64_t skipped_rows_count = brs_.skip_->accumulate_bit_cnt(count);
got_batch = true;
output_row_cnt_ += (count - skipped_rows_count);
input_row_cnt_ += (count - skipped_rows_count);
}
} else {
got_batch = true;
output_row_cnt_ += count;
input_row_cnt_ += count;
}
}
}
return ret;

View File

@ -383,6 +383,7 @@ protected:
int local_iter_reuse();
int switch_batch_iter();
int set_batch_iter(int64_t group_id);
int calc_expr_int_value(const ObExpr &expr, int64_t &retval, bool &is_null_value);
int init_table_scan_rtdef();
int init_das_scan_rtdef(const ObDASScanCtDef &das_ctdef,