[FEAT MERGE] merge NLJ/SPF group rescan

This commit is contained in:
obdev
2023-01-12 15:17:04 +00:00
committed by OB-robot
parent 4799992a7f
commit 263a44af96
36 changed files with 3945 additions and 859 deletions

View File

@ -23,7 +23,10 @@ namespace sql
{
OB_SERIALIZE_MEMBER((ObNestedLoopJoinSpec, ObBasicNestedLoopJoinSpec),
use_group_, left_group_size_, left_expr_ids_in_other_cond_);
group_rescan_, group_size_,
left_expr_ids_in_other_cond_,
left_rescan_params_,
right_rescan_params_);
ObNestedLoopJoinOp::ObNestedLoopJoinOp(ObExecContext &exec_ctx,
const ObOpSpec &spec,
@ -35,8 +38,8 @@ ObNestedLoopJoinOp::ObNestedLoopJoinOp(ObExecContext &exec_ctx,
batch_state_(JS_FILL_LEFT), save_last_batch_(false),
batch_mem_ctx_(NULL), stored_rows_(NULL), left_brs_(NULL), left_matched_(NULL),
need_switch_iter_(false), iter_end_(false), op_max_batch_size_(0),
max_group_size_(BNLJ_DEFAULT_GROUP_SIZE),
bnlj_cur_idx_(0)
max_group_size_(OB_MAX_BULK_JOIN_ROWS),
group_join_buffer_()
{
state_operation_func_[JS_JOIN_END] = &ObNestedLoopJoinOp::join_end_operate;
state_function_func_[JS_JOIN_END][FT_ITER_GOING] = NULL;
@ -61,8 +64,8 @@ int ObNestedLoopJoinOp::inner_open()
LOG_WARN("failed to open in base class", K(ret));
}
if (OB_SUCC(ret) && is_vectorized()) {
if (MY_SPEC.use_group_) {
max_group_size_ = BNLJ_DEFAULT_GROUP_SIZE + MY_SPEC.plan_->get_batch_size();
if (MY_SPEC.group_rescan_) {
max_group_size_ = OB_MAX_BULK_JOIN_ROWS + MY_SPEC.plan_->get_batch_size();
}
if (OB_ISNULL(batch_mem_ctx_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
@ -118,15 +121,25 @@ int ObNestedLoopJoinOp::inner_open()
&(batch_mem_ctx_->get_arena_allocator()),
MY_SPEC.max_batch_size_))) {
LOG_WARN("fail to init batch", K(ret));
} else if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
} else if (MY_SPEC.enable_px_batch_rescan_) {
if (OB_FAIL(last_save_batch_.init(&left_->get_spec().output_,
&batch_mem_ctx_->get_arena_allocator(),
MY_SPEC.max_batch_size_))) {
&batch_mem_ctx_->get_arena_allocator(),
MY_SPEC.max_batch_size_))) {
LOG_WARN("fail to init batch", K(ret));
}
}
}
}
if (OB_SUCC(ret) && MY_SPEC.group_rescan_) {
if (OB_FAIL(group_join_buffer_.init(this,
max_group_size_,
MY_SPEC.group_size_,
&MY_SPEC.rescan_params_,
&MY_SPEC.left_rescan_params_,
&MY_SPEC.right_rescan_params_))) {
LOG_WARN("init batch info failed", KR(ret));
}
}
return ret;
}
//NLJ has its own switch_iterator
@ -156,12 +169,19 @@ int ObNestedLoopJoinOp::rescan()
//NLJ's rescan should only drive left child's rescan,
//the right child's rescan is defer to rescan_right_operator() driven by get_next_row();
defered_right_rescan_ = true;
if (OB_FAIL(left_->rescan())) {
LOG_WARN("rescan left child operator failed", K(ret), "child op_type", left_->op_name());
}
if (OB_SUCC(ret)) {
if (OB_FAIL(inner_rescan())) {
LOG_WARN("failed to inner rescan", K(ret));
if (!MY_SPEC.group_rescan_) {
if (OB_FAIL(left_->rescan())) {
LOG_WARN("rescan left child operator failed", KR(ret), "child op_type", left_->op_name());
} else if (OB_FAIL(inner_rescan())) {
LOG_WARN("failed to inner rescan", KR(ret));
}
} else {
if (OB_FAIL(group_join_buffer_.init_above_group_params())) {
LOG_WARN("init above bnlj params failed", KR(ret));
} else if (OB_FAIL(group_join_buffer_.rescan_left())) {
LOG_WARN("rescan left failed", KR(ret));
} else if (OB_FAIL(inner_rescan())) {
LOG_WARN("inner rescan failed", KR(ret));
}
}
@ -268,33 +288,6 @@ int ObNestedLoopJoinOp::fill_cur_row_rescan_param()
return ret;
}
int ObNestedLoopJoinOp::fill_cur_row_bnlj_param()
{
int ret = OB_SUCCESS;
ObPhysicalPlanCtx *plan_ctx = ctx_.get_physical_plan_ctx();
if (bnlj_params_.empty() || bnlj_cur_idx_ >= bnlj_params_.at(0).count_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("row idx is unexpected", K(ret),
K(bnlj_cur_idx_), K(bnlj_params_.at(0).count_));
} else {
int64_t param_cnt = bnlj_params_.count();
for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) {
const ObDynamicParamSetter &rescan_param = get_spec().rescan_params_.at(i);
int64_t param_idx = rescan_param.param_idx_;
ObExpr *dst = rescan_param.dst_;
ObDatum &param_datum = dst->locate_datum_for_write(eval_ctx_);
ObSqlArrayObj &arr = bnlj_params_.at(i);
if (OB_FAIL(param_datum.from_obj(arr.data_[bnlj_cur_idx_], dst->obj_datum_map_))) {
LOG_WARN("fail to cast datum", K(ret));
} else {
plan_ctx->get_param_store_for_update().at(param_idx) = arr.data_[bnlj_cur_idx_];
dst->set_evaluated_projected(eval_ctx_);
}
}
}
return ret;
}
int ObNestedLoopJoinOp::join_row_with_semi_join()
{
int ret = OB_SUCCESS;
@ -356,7 +349,7 @@ int ObNestedLoopJoinOp::join_end_func_end()
int ObNestedLoopJoinOp::read_left_operate()
{
int ret = OB_SUCCESS;
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.group_rescan_ || MY_SPEC.enable_px_batch_rescan_) {
if (OB_FAIL(group_read_left_operate()) && OB_ITER_END != ret) {
LOG_WARN("failed to read left group", K(ret));
}
@ -408,230 +401,165 @@ int ObNestedLoopJoinOp::rescan_right_operator()
}
} else {
brs_holder_.reset();
if (MY_SPEC.use_group_ && !MY_SPEC.enable_px_batch_rescan_) {
if (OB_FAIL(fill_cur_row_bnlj_param())) {
LOG_WARN("fill bnlj param failed", K(ret));
} else {
bnlj_cur_idx_++;
}
}
}
}
return ret;
}
int ObNestedLoopJoinOp::init_bnlj_params()
{
int ret = OB_SUCCESS;
if (!bnlj_params_.empty()) {
//to reuse bnlj param buffer
for (int64_t i = 0; i < bnlj_params_.count(); ++i) {
bnlj_params_.at(i).count_ = 0;
}
} else if (OB_FAIL(bnlj_params_.allocate_array(ctx_.get_allocator(),
get_spec().rescan_params_.count()))) {
LOG_WARN("allocate bnlj params failed", K(ret), K(get_spec().rescan_params_.count()));
} else {
int64_t obj_buf_size = sizeof(ObObjParam) * max_group_size_;
for (int64_t i = 0; OB_SUCC(ret) && i < bnlj_params_.count(); ++i) {
ObExpr *dst_expr = get_spec().rescan_params_.at(i).dst_;
void *buf = ctx_.get_allocator().alloc(obj_buf_size);
if (OB_ISNULL(buf)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc memory", K(ret), K(obj_buf_size));
} else {
bnlj_params_.at(i).data_ = reinterpret_cast<ObObjParam*>(buf);
bnlj_params_.at(i).count_ = 0;
bnlj_params_.at(i).element_.set_meta_type(dst_expr->obj_meta_);
}
}
}
return ret;
}
int ObNestedLoopJoinOp::bind_bnlj_param_to_store()
{
int ret = OB_SUCCESS;
int64_t param_cnt = get_spec().rescan_params_.count();
ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_);
ParamStore &param_store = plan_ctx->get_param_store_for_update();
if (OB_UNLIKELY(param_cnt != bnlj_params_.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("bnlj param count is invalid", K(ret), K(param_cnt), K(bnlj_params_.count()));
}
for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) {
const ObDynamicParamSetter &rescan_param = get_spec().rescan_params_.at(i);
int64_t param_idx = rescan_param.param_idx_;
int64_t array_obj_addr = reinterpret_cast<int64_t>(&bnlj_params_.at(i));
param_store.at(param_idx).set_extend(array_obj_addr, T_EXT_SQL_ARRAY);
}
return ret;
}
int ObNestedLoopJoinOp::group_read_left_operate()
{
int ret = OB_SUCCESS;
if (left_store_iter_.is_valid() && left_store_iter_.has_next()) {
// 重新设置右表 table scan result, result 为下一个 cache
if (MY_SPEC.enable_px_batch_rescan_) {
batch_rescan_ctl_.cur_idx_++;
} else if (OB_FAIL(rescan_right_operator())) {
// 这里是不期望有 OB_ITER_END
if (OB_ITER_END == ret) {
ret = OB_ERR_UNEXPECTED;
if (MY_SPEC.enable_px_batch_rescan_) {
if (left_store_iter_.is_valid() && left_store_iter_.has_next()) {
// 重新设置右表 table scan result, result 为下一个 cache
if (MY_SPEC.enable_px_batch_rescan_) {
batch_rescan_ctl_.cur_idx_++;
}
LOG_WARN("failed to get next right row from group", K(ret));
}
} else {
// 当前 row 对应的 cache 读完了, 左表拿新 cache, 设置右表 result_iter
if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(init_bnlj_params())) {
LOG_WARN("Failed to init group rescan", K(ret));
} else if (is_left_end_) {
ret = OB_ITER_END;
} else {
if (OB_ISNULL(mem_context_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
uint64_t tenant_id =session->get_effective_tenant_id();
lib::ContextParam param;
param.set_mem_attr(tenant_id,
ObModIds::OB_SQL_NLJ_CACHE,
ObCtxIds::WORK_AREA)
.set_properties(lib::USE_TL_PAGE_OPTIONAL);
if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) {
LOG_WARN("create entity failed", K(ret));
} else if (OB_ISNULL(mem_context_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("null memory entity returned", K(ret));
} else if (OB_FAIL(left_store_.init(UINT64_MAX, tenant_id, ObCtxIds::WORK_AREA))) {
LOG_WARN("init row store failed", K(ret));
} else {
left_store_.set_allocator(mem_context_->get_malloc_allocator());
}
}
bool ignore_end = false;
if (OB_SUCC(ret)) {
// 没有下一个了, 尝试填充 cache.
batch_rescan_ctl_.reuse();
bnlj_cur_idx_ = 0;
left_store_iter_.reset();
left_store_.reset();
mem_context_->get_arena_allocator().reset();
if (OB_ISNULL(last_store_row_.get_store_row())) {
if (save_last_row_) {
if (is_left_end_) {
ret = OB_ITER_END;
} else {
if (OB_ISNULL(mem_context_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
uint64_t tenant_id =session->get_effective_tenant_id();
lib::ContextParam param;
param.set_mem_attr(tenant_id,
ObModIds::OB_SQL_NLJ_CACHE,
ObCtxIds::WORK_AREA)
.set_properties(lib::USE_TL_PAGE_OPTIONAL);
if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) {
LOG_WARN("create entity failed", K(ret));
} else if (OB_ISNULL(mem_context_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status: store row is null", K(ret));
} else if (OB_FAIL(last_store_row_.init(
mem_context_->get_malloc_allocator(), left_->get_spec().output_.count()))) {
LOG_WARN("failed to init right last row", K(ret));
}
} else if (save_last_row_) {
if (OB_FAIL(last_store_row_.restore(left_->get_spec().output_, eval_ctx_))) {
LOG_WARN("failed to restore left row", K(ret));
}
}
save_last_row_ = false;
set_param_null();
while (OB_SUCC(ret) && !is_full()) {
// need clear evaluated flag, since prepare_rescan_params() will evaluate expression.
clear_evaluated_flag();
if (OB_FAIL(get_next_left_row())) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to get next left row", K(ret));
} else {
is_left_end_ = true;
}
} else if (OB_FAIL(left_store_.add_row(left_->get_spec().output_, &eval_ctx_))) {
LOG_WARN("failed to store left row", K(ret));
// do nothing
} else if (OB_FAIL(prepare_rescan_params(true/*is_group*/))) {
LOG_WARN("failed to prepare rescan params", K(ret));
// 下压参数数据是由被换的原始表达式计算生成, 比如c1 = c2 + 1--> c1 = ?;
// 下压参数?的值, 由c2+1计算而来, c2+1的内存是复用的, 如果此时不深拷贝
// 计算query range的下压param, 则可能导致后面query range的结果和
// 前面query range的obobj对应的ptr(string/number类型在obj中ptr)使用相同指针;
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(deep_copy_dynamic_obj())) {
LOG_WARN("fail to deep copy dynamic obj", K(ret));
LOG_WARN("null memory entity returned", K(ret));
} else if (OB_FAIL(left_store_.init(UINT64_MAX, tenant_id, ObCtxIds::WORK_AREA))) {
LOG_WARN("init row store failed", K(ret));
} else {
ignore_end = true;
left_store_.set_allocator(mem_context_->get_malloc_allocator());
}
}
bool ignore_end = false;
if (OB_SUCC(ret)) {
// here need to set param null, because dynamic datum ptr
// which from last batch row may invalid
// 没有下一个了, 尝试填充 cache.
batch_rescan_ctl_.reuse();
left_store_iter_.reset();
left_store_.reset();
mem_context_->get_arena_allocator().reset();
if (OB_ISNULL(last_store_row_.get_store_row())) {
if (save_last_row_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status: store row is null", K(ret));
} else if (OB_FAIL(last_store_row_.init(
mem_context_->get_malloc_allocator(), left_->get_spec().output_.count()))) {
LOG_WARN("failed to init right last row", K(ret));
}
} else if (save_last_row_) {
if (OB_FAIL(last_store_row_.restore(left_->get_spec().output_, eval_ctx_))) {
LOG_WARN("failed to restore left row", K(ret));
}
}
save_last_row_ = false;
set_param_null();
if (OB_FAIL(last_store_row_.shadow_copy(left_->get_spec().output_, eval_ctx_))) {
LOG_WARN("failed to shadow copy last left row", K(ret));
} else {
save_last_row_ = true;
while (OB_SUCC(ret) && !is_full()) {
// need clear evaluated flag, since prepare_rescan_params() will evaluate expression.
clear_evaluated_flag();
if (OB_FAIL(get_next_left_row())) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to get next left row", K(ret));
} else {
is_left_end_ = true;
}
} else if (OB_FAIL(left_store_.add_row(left_->get_spec().output_, &eval_ctx_))) {
LOG_WARN("failed to store left row", K(ret));
// do nothing
} else if (OB_FAIL(prepare_rescan_params(true/*is_group*/))) {
LOG_WARN("failed to prepare rescan params", K(ret));
// 下压参数数据是由被换的原始表达式计算生成, 比如c1 = c2 + 1--> c1 = ?;
// 下压参数?的值, 由c2+1计算而来, c2+1的内存是复用的, 如果此时不深拷贝
// 计算query range的下压param, 则可能导致后面query range的结果和
// 前面query range的obobj对应的ptr(string/number类型在obj中ptr)使用相同指针;
} else {
ignore_end = true;
}
}
if (OB_SUCC(ret)) {
// here need to set param null, because dynamic datum ptr
// which from last batch row may invalid
set_param_null();
if (OB_FAIL(last_store_row_.shadow_copy(left_->get_spec().output_, eval_ctx_))) {
LOG_WARN("failed to shadow copy last left row", K(ret));
} else {
save_last_row_ = true;
}
}
}
if (OB_SUCC(ret) || (ignore_end && OB_ITER_END == ret)) {
ret = OB_SUCCESS;
ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_);
if (OB_FAIL(left_store_.finish_add_row(false))) {
LOG_WARN("failed to finish add row to row store", K(ret));
} else if (OB_FAIL(left_store_.begin(left_store_iter_))) {
LOG_WARN("failed to begin iterator for chunk row store", K(ret));
}
}
}
if (OB_SUCC(ret) || (ignore_end && OB_ITER_END == ret)) {
ret = OB_SUCCESS;
ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_);
if (OB_FAIL(left_store_.finish_add_row(false))) {
LOG_WARN("failed to finish add row to row store", K(ret));
} else if (OB_FAIL(left_store_.begin(left_store_iter_))) {
LOG_WARN("failed to begin iterator for chunk row store", K(ret));
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(bind_bnlj_param_to_store())) {
LOG_WARN("bind bnlj param to store failed", K(ret));
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(rescan_right_operator())) {
LOG_WARN("failed to rescan right op", K(ret));
}
}
}
}
if (OB_SUCC(ret)) {
// 拿到下一行 ret = OB_SUCCESS;
clear_evaluated_flag();
if (OB_FAIL(left_store_iter_.get_next_row(left_->get_spec().output_,
eval_ctx_))) {
LOG_WARN("Failed to get next row", K(ret));
} else if (MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(fill_cur_row_rescan_param())) {
LOG_WARN("fail to fill cur row rescan param", K(ret));
} else if (MY_SPEC.enable_px_batch_rescan_) {
OZ(right_->rescan());
OX(brs_holder_.reset());
}
if (OB_SUCC(ret)) {
left_row_joined_ = false;
// 拿到下一行 ret = OB_SUCCESS;
clear_evaluated_flag();
if (OB_FAIL(left_store_iter_.get_next_row(left_->get_spec().output_,
eval_ctx_))) {
LOG_WARN("Failed to get next row", K(ret));
} else if (MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(fill_cur_row_rescan_param())) {
LOG_WARN("fail to fill cur row rescan param", K(ret));
} else if (MY_SPEC.enable_px_batch_rescan_) {
OZ(right_->rescan());
OX(brs_holder_.reset());
}
if (OB_SUCC(ret)) {
left_row_joined_ = false;
}
}
}
return ret;
}
int ObNestedLoopJoinOp::deep_copy_dynamic_obj()
{
int ret = OB_SUCCESS;
int64_t param_cnt = get_spec().rescan_params_.count();
ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_);
ParamStore &param_store = plan_ctx->get_param_store_for_update();
if (OB_ISNULL(mem_context_)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("mem entity not init", K(ret));
}
for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) {
const ObDynamicParamSetter &rescan_param = get_spec().rescan_params_.at(i);
int64_t param_idx = rescan_param.param_idx_;
if (OB_FAIL(ob_write_obj(mem_context_->get_arena_allocator(),
param_store.at(param_idx),
bnlj_params_.at(i).data_[bnlj_params_.at(i).count_]))) {
LOG_WARN("deep copy dynamic param", K(ret));
} else {
// das group rescan
bool has_next = false;
if (OB_FAIL(group_join_buffer_.fill_group_buffer())) {
if (OB_ITER_END != ret) {
LOG_WARN("fill group buffer failed", KR(ret));
}
} else if (OB_FAIL(group_join_buffer_.has_next_left_row(has_next))) {
LOG_WARN("check has next failed", KR(ret));
} else if (has_next) {
clear_evaluated_flag();
if (OB_FAIL(group_join_buffer_.rescan_right())) {
if (OB_ITER_END == ret) {
ret = OB_ERR_UNEXPECTED;
}
LOG_WARN("rescan right failed", KR(ret));
} else if (OB_FAIL(group_join_buffer_.fill_cur_row_group_param())) {
LOG_WARN("fill group param failed", KR(ret));
}
} else {
++bnlj_params_.at(i).count_;
ret = OB_ITER_END;
}
if (OB_SUCC(ret)) {
clear_evaluated_flag();
if (OB_FAIL(group_join_buffer_.get_next_row_from_store())) {
if (OB_ITER_END != ret) {
LOG_WARN("get next row failed", KR(ret));
}
} else {
left_row_joined_ = false;
}
}
}
return ret;
}
int ObNestedLoopJoinOp::read_left_func_going()
{
int ret = OB_SUCCESS;
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.group_rescan_ || MY_SPEC.enable_px_batch_rescan_) {
// do nothing
// group nested loop join 已经做过 rescan 了
} else if (OB_FAIL(prepare_rescan_params())) {
@ -694,13 +622,13 @@ int ObNestedLoopJoinOp::read_right_func_end()
bool ObNestedLoopJoinOp::is_full() const
{
return left_store_.get_row_cnt() >= MY_SPEC.left_group_size_;
return left_store_.get_row_cnt() >= MY_SPEC.group_size_;
}
int ObNestedLoopJoinOp::get_left_batch()
{
int ret = OB_SUCCESS;
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.group_rescan_ || MY_SPEC.enable_px_batch_rescan_) {
if (OB_FAIL(group_get_left_batch(left_brs_)) && OB_ITER_END != ret) {
LOG_WARN("fail to get left batch", K(ret));
}
@ -735,127 +663,145 @@ int ObNestedLoopJoinOp::get_left_batch()
int ObNestedLoopJoinOp::group_get_left_batch(const ObBatchRows *&left_brs)
{
int ret = OB_SUCCESS;
left_brs = &left_->get_brs();
if (left_store_iter_.is_valid() && left_store_iter_.has_next()) {
// do nothing
} else {
if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(init_bnlj_params())) {
LOG_WARN("Failed to init bnlj params", K(ret));
} else if (is_left_end_) {
ret = OB_ITER_END;
if (MY_SPEC.enable_px_batch_rescan_) {
left_brs = &left_->get_brs();
if (left_store_iter_.is_valid() && left_store_iter_.has_next()) {
// do nothing
} else {
if (OB_ISNULL(mem_context_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
uint64_t tenant_id =session->get_effective_tenant_id();
lib::ContextParam param;
param.set_mem_attr(tenant_id,
ObModIds::OB_SQL_NLJ_CACHE,
ObCtxIds::WORK_AREA)
.set_properties(lib::USE_TL_PAGE_OPTIONAL);
if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) {
LOG_WARN("create entity failed", K(ret));
} else if (OB_ISNULL(mem_context_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("null memory entity returned", K(ret));
} else if (OB_FAIL(left_store_.init(UINT64_MAX, tenant_id, ObCtxIds::WORK_AREA))) {
LOG_WARN("init row store failed", K(ret));
} else {
left_store_.set_allocator(mem_context_->get_malloc_allocator());
}
}
if (OB_SUCC(ret)) {
// 没有下一个了, 尝试填充 cache.
batch_rescan_ctl_.reuse();
bnlj_cur_idx_ = 0;
left_store_iter_.reset();
left_store_.reset();
mem_context_->get_arena_allocator().reset();
save_last_row_ = false;
ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_);
while (OB_SUCC(ret) && continue_fetching()) {
// need clear evaluated flag, since prepare_rescan_params() will evaluate expression.
clear_evaluated_flag();
if (save_last_batch_) {
last_save_batch_.to_exprs(eval_ctx_);
save_last_batch_ = false;
}
set_param_null();
if (OB_FAIL(left_->get_next_batch(op_max_batch_size_, left_brs_))) {
LOG_WARN("failed to get next left row", K(ret));
} else if (left_brs_->end_) {
is_left_end_ = true;
}
for (int64_t l_idx = 0; OB_SUCC(ret) && l_idx < left_brs_->size_; l_idx++) {
if (left_brs_->skip_->exist(l_idx)) { continue; }
batch_info_guard.set_batch_idx(l_idx);
batch_info_guard.set_batch_size(left_brs_->size_);
if (OB_FAIL(left_store_.add_row(left_->get_spec().output_, &eval_ctx_))) {
LOG_WARN("failed to store left row", K(ret));
// do nothing
} else if (OB_FAIL(prepare_rescan_params(true))) {
LOG_WARN("failed to prepare rescan params", K(ret));
// 下压参数数据是由被换的原始表达式计算生成, 比如c1 = c2 + 1--> c1 = ?;
// 下压参数?的值, 由c2+1计算而来, c2+1的内存是复用的, 如果此时不深拷贝
// 计算query range的下压param, 则可能导致后面query range的结果和
// 前面query range的obobj对应的ptr(string/number类型在obj中ptr)使用相同指针;
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(deep_copy_dynamic_obj())) {
LOG_WARN("fail to deep copy dynamic obj", K(ret));
}
} // for end
}
if (OB_SUCC(ret)) {
set_param_null();
if (left_brs_->size_ == 0 && left_brs_->end_) {
// do nothing
} else {
last_save_batch_.from_exprs(eval_ctx_, left_brs_->skip_, left_brs_->size_);
save_last_batch_ = true;
}
}
clear_evaluated_flag();
}
if (OB_SUCC(ret) ) {
if (left_store_.get_row_cnt() <= 0) {
ret = OB_ITER_END;
} else if (OB_FAIL(left_store_.finish_add_row(false))) {
LOG_WARN("failed to finish add row to row store", K(ret));
} else if (OB_FAIL(left_store_.begin(left_store_iter_))) {
LOG_WARN("failed to begin iterator for chunk row store", K(ret));
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(bind_bnlj_param_to_store())) {
LOG_WARN("bind bnlj param to store failed", K(ret));
} else if (!MY_SPEC.enable_px_batch_rescan_ && OB_FAIL(rescan_right_operator())) {
LOG_WARN("failed to rescan right op", K(ret));
} else {
need_switch_iter_ = false;
}
}
}
}
if (OB_SUCC(ret)) {
int64_t read_size = 0;
int64_t max_size = MY_SPEC.max_batch_size_;
last_save_batch_.extend_save(eval_ctx_, max_size);
if (OB_FAIL(left_store_iter_.get_next_batch(left_->get_spec().output_,
eval_ctx_,
max_size,
read_size))) {
if (OB_ITER_END == ret) {
if (is_left_end_) {
ret = OB_ITER_END;
// do nothing
} else {
LOG_WARN("Failed to get next row", K(ret));
if (OB_ISNULL(mem_context_)) {
ObSQLSessionInfo *session = ctx_.get_my_session();
uint64_t tenant_id =session->get_effective_tenant_id();
lib::ContextParam param;
param.set_mem_attr(tenant_id,
ObModIds::OB_SQL_NLJ_CACHE,
ObCtxIds::WORK_AREA)
.set_properties(lib::USE_TL_PAGE_OPTIONAL);
if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) {
LOG_WARN("create entity failed", K(ret));
} else if (OB_ISNULL(mem_context_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("null memory entity returned", K(ret));
} else if (OB_FAIL(left_store_.init(UINT64_MAX, tenant_id, ObCtxIds::WORK_AREA))) {
LOG_WARN("init row store failed", K(ret));
} else {
left_store_.set_allocator(mem_context_->get_malloc_allocator());
}
}
if (OB_SUCC(ret)) {
// 没有下一个了, 尝试填充 cache.
batch_rescan_ctl_.reuse();
left_store_iter_.reset();
left_store_.reset();
mem_context_->get_arena_allocator().reset();
save_last_row_ = false;
ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_);
while (OB_SUCC(ret) && continue_fetching()) {
// need clear evaluated flag, since prepare_rescan_params() will evaluate expression.
clear_evaluated_flag();
if (save_last_batch_) {
last_save_batch_.to_exprs(eval_ctx_);
save_last_batch_ = false;
}
set_param_null();
if (OB_FAIL(left_->get_next_batch(op_max_batch_size_, left_brs_))) {
LOG_WARN("failed to get next left row", K(ret));
} else if (left_brs_->end_) {
is_left_end_ = true;
}
for (int64_t l_idx = 0; OB_SUCC(ret) && l_idx < left_brs_->size_; l_idx++) {
if (left_brs_->skip_->exist(l_idx)) { continue; }
batch_info_guard.set_batch_idx(l_idx);
batch_info_guard.set_batch_size(left_brs_->size_);
if (OB_FAIL(left_store_.add_row(left_->get_spec().output_, &eval_ctx_))) {
LOG_WARN("failed to store left row", K(ret));
// do nothing
} else if (OB_FAIL(prepare_rescan_params(true))) {
LOG_WARN("failed to prepare rescan params", K(ret));
// 下压参数数据是由被换的原始表达式计算生成, 比如c1 = c2 + 1--> c1 = ?;
// 下压参数?的值, 由c2+1计算而来, c2+1的内存是复用的, 如果此时不深拷贝
// 计算query range的下压param, 则可能导致后面query range的结果和
// 前面query range的obobj对应的ptr(string/number类型在obj中ptr)使用相同指针;
}
} // for end
}
if (OB_SUCC(ret)) {
set_param_null();
if (left_brs_->size_ == 0 && left_brs_->end_) {
// do nothing
} else {
last_save_batch_.from_exprs(eval_ctx_, left_brs_->skip_, left_brs_->size_);
save_last_batch_ = true;
}
}
clear_evaluated_flag();
}
if (OB_SUCC(ret) ) {
if (left_store_.get_row_cnt() <= 0) {
ret = OB_ITER_END;
} else if (OB_FAIL(left_store_.finish_add_row(false))) {
LOG_WARN("failed to finish add row to row store", K(ret));
} else if (OB_FAIL(left_store_.begin(left_store_iter_))) {
LOG_WARN("failed to begin iterator for chunk row store", K(ret));
} else {
need_switch_iter_ = false;
}
}
}
}
if (OB_SUCC(ret)) {
const_cast<ObBatchRows *>(left_brs)->skip_->reset(read_size);
const_cast<ObBatchRows *>(left_brs)->size_ = read_size;
const_cast<ObBatchRows *>(left_brs)->end_ = false;
left_row_joined_ = false;
int64_t read_size = 0;
int64_t max_size = MY_SPEC.max_batch_size_;
last_save_batch_.extend_save(eval_ctx_, max_size);
if (OB_FAIL(left_store_iter_.get_next_batch(left_->get_spec().output_,
eval_ctx_,
max_size,
read_size))) {
if (OB_ITER_END == ret) {
// do nothing
} else {
LOG_WARN("Failed to get next row", K(ret));
}
}
if (OB_SUCC(ret)) {
const_cast<ObBatchRows *>(left_brs)->skip_->reset(read_size);
const_cast<ObBatchRows *>(left_brs)->size_ = read_size;
const_cast<ObBatchRows *>(left_brs)->end_ = false;
left_row_joined_ = false;
}
}
} else {
// das group rescan
bool has_next = false;
if (OB_FAIL(group_join_buffer_.batch_fill_group_buffer(op_max_batch_size_, left_brs_))) {
if (OB_ITER_END != ret) {
LOG_WARN("batch fill group buffer failed", KR(ret));
}
} else if (OB_FAIL(group_join_buffer_.has_next_left_row(has_next))) {
LOG_WARN("check has next failed", KR(ret));
} else if (!has_next) {
ret = OB_ITER_END;
}
if (OB_SUCC(ret)) {
int64_t read_size = 0;
int64_t max_size = min(MY_SPEC.max_batch_size_, left_->get_spec().max_batch_size_);
if (OB_FAIL(group_join_buffer_.get_next_batch_from_store(max_size, read_size))) {
if (OB_ITER_END != ret) {
LOG_WARN("get next batch from store failed", KR(ret));
}
} else {
const_cast<ObBatchRows *>(left_brs)->skip_->reset(read_size);
const_cast<ObBatchRows *>(left_brs)->size_ = read_size;
const_cast<ObBatchRows *>(left_brs)->end_ = false;
left_row_joined_ = false;
}
}
}
return ret;
}
@ -870,21 +816,20 @@ int ObNestedLoopJoinOp::process_left_batch()
// Adding seperated guards for left/right children can also solve the problem,
// we don't choose that way due to performance reason.
batch_info_guard.set_batch_size(left_brs_->size_);
if (!MY_SPEC.use_group_ && !MY_SPEC.enable_px_batch_rescan_) {
if (!MY_SPEC.group_rescan_ && !MY_SPEC.enable_px_batch_rescan_) {
batch_info_guard.set_batch_idx(l_idx);
if (left_brs_->skip_->exist(l_idx)) { continue; }
if (OB_FAIL(rescan_params_batch_one(l_idx))) {
LOG_WARN("fail to rescan params", K(ret));
}
} else if (MY_SPEC.use_group_ && !MY_SPEC.enable_px_batch_rescan_) {
// after group rescan, first left row not need switch iter
if (!need_switch_iter_) {
need_switch_iter_ = true;
} else if (OB_FAIL(rescan_right_operator())) {
} else if (MY_SPEC.group_rescan_ && !MY_SPEC.enable_px_batch_rescan_) {
if (OB_FAIL(group_join_buffer_.rescan_right())) {
if (OB_ITER_END == ret) {
ret = OB_ERR_UNEXPECTED;
}
LOG_WARN("fail to switch iterator", K(ret));
LOG_WARN("rescan right failed", KR(ret));
} else if (OB_FAIL(group_join_buffer_.fill_cur_row_group_param())) {
LOG_WARN("fill group param failed", KR(ret));
}
} else if (MY_SPEC.enable_px_batch_rescan_) {
// NOTE: left batch is ALWAYS continous, NO need to check skip for
@ -979,9 +924,9 @@ int ObNestedLoopJoinOp::calc_right_batch_matched_result(
if (0 == conds.count()) {
brs_.skip_->deep_copy(*right_brs->skip_, right_brs->size_);
} else {
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.enable_px_batch_rescan_) {
last_save_batch_.extend_save(eval_ctx_, right_brs->size_);
} else {
} else if (!MY_SPEC.group_rescan_) {
left_batch_.extend_save(eval_ctx_, right_brs->size_);
}
batch_info_guard.set_batch_size(right_brs->size_);
@ -1029,7 +974,7 @@ int ObNestedLoopJoinOp::output()
brs_.skip_->bit_calculate(*left_batch_.skip_, *left_matched_, left_batch_.size_,
[](const uint64_t l, const uint64_t r) { return (l | r); });
}
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.enable_px_batch_rescan_) {
last_save_batch_.extend_save(eval_ctx_, left_batch_.size_);
}
left_batch_.to_exprs(eval_ctx_);
@ -1049,9 +994,9 @@ int ObNestedLoopJoinOp::output()
LOG_WARN("fail to get next batch", K(ret));
}
} else {
if (MY_SPEC.use_group_ || MY_SPEC.enable_px_batch_rescan_) {
if (MY_SPEC.enable_px_batch_rescan_) {
last_save_batch_.extend_save(eval_ctx_, read_rows);
} else {
} else if (!MY_SPEC.group_rescan_) {
left_batch_.extend_save(eval_ctx_, read_rows);
}
for (int64_t i = 0; OB_SUCC(ret) && i < read_rows; i++) {
@ -1172,90 +1117,5 @@ int ObNestedLoopJoinOp::calc_other_conds_with_update_left_expr(bool &is_match,
return ret;
}
int ObBatchRowDatums::init(const ObExprPtrIArray *exprs, ObIAllocator *alloc, int32_t batch_size)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(alloc) || OB_ISNULL(exprs)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), KP(alloc), KP(exprs));
} else {
char *buf= (char *)alloc->alloc(ObBitVector::memory_size(batch_size)
+ sizeof(ObDatum) * batch_size * exprs->count());
if (NULL == buf) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc memory", K(ret));
} else {
MEMSET(buf, 0, ObBitVector::memory_size(batch_size));
skip_ = to_bit_vector(buf);
alloc_ = alloc;
exprs_ = exprs;
datums_ = reinterpret_cast<ObDatum *>(buf + ObBitVector::memory_size(batch_size));
batch_size_ = batch_size;
size_ = 0;
saved_size_ = 0;
}
}
return ret;
}
void ObBatchRowDatums::from_exprs(ObEvalCtx &ctx, ObBitVector *skip, int64_t size)
{
OB_ASSERT(size <= batch_size_);
OB_ASSERT(OB_NOT_NULL(skip) && OB_NOT_NULL(exprs_));
for (int64_t i = 0; i < exprs_->count(); i++) {
ObExpr *expr = exprs_->at(i);
ObDatum *datums = expr->locate_batch_datums(ctx);
int64_t copy_size = (expr->is_batch_result() ? size: 1) * sizeof(ObDatum);
MEMCPY(datums_ + i * batch_size_, datums, copy_size);
}
size_ = size;
saved_size_ = size;
skip_->deep_copy(*skip, size);
}
void ObBatchRowDatums::extend_save(ObEvalCtx &ctx, int64_t size)
{
if (size > saved_size_) {
for (int64_t i = 0; i < exprs_->count(); i++) {
ObExpr *expr = exprs_->at(i);
if (expr->is_batch_result()) {
ObDatum *datums = expr->locate_batch_datums(ctx);
int64_t copy_size = (size - saved_size_) * sizeof(ObDatum);
MEMCPY(datums_ + i * batch_size_ + saved_size_, datums + saved_size_, copy_size);
}
}
saved_size_ = size;
}
}
void ObBatchRowDatums::to_exprs(ObEvalCtx &ctx)
{
if (saved_size_ > 0) {
for (int64_t i = 0; i < exprs_->count(); i++) {
ObExpr *expr = exprs_->at(i);
ObDatum *datums = expr->locate_batch_datums(ctx);
int64_t copy_size = (expr->is_batch_result() ? saved_size_: 1) * sizeof(ObDatum);
MEMCPY(datums, datums_ + i * batch_size_, copy_size);
}
}
}
void ObBatchRowDatums::to_exprs(ObEvalCtx &ctx, int64_t from_idx, int64_t to_idx)
{
OB_ASSERT(from_idx <= size_ && to_idx <= batch_size_);
OB_ASSERT(!skip_->exist(from_idx));
for (int64_t i = 0; i < exprs_->count(); i++) {
ObExpr *expr = exprs_->at(i);
ObDatum *datums = expr->locate_batch_datums(ctx);
if (!expr->is_batch_result()) {
*datums = *(datums_ + i * batch_size_);
} else {
*(datums + to_idx) = *(datums_ + i * batch_size_ + from_idx) ;
}
}
}
} // end namespace sql
} // end namespace oceanbase