Files
oceanbase/src/sql/engine/set/ob_merge_set_op.cpp
wangzelin.wzl 93a1074b0c patch 4.0
2022-10-24 17:57:12 +08:00

436 lines
14 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL_ENG
#include "sql/engine/set/ob_merge_set_op.h"
namespace oceanbase
{
using namespace common;
namespace sql
{
ObMergeSetSpec::ObMergeSetSpec(ObIAllocator &alloc, const ObPhyOperatorType type)
: ObSetSpec(alloc, type)
{
}
OB_SERIALIZE_MEMBER((ObMergeSetSpec, ObSetSpec));
ObMergeSetOp::ObMergeSetOp(ObExecContext &exec_ctx, const ObOpSpec &spec, ObOpInput *input)
: ObOperator(exec_ctx, spec, input),
alloc_(ObModIds::OB_SQL_MERGE_GROUPBY,
OB_MALLOC_NORMAL_BLOCK_SIZE, OB_SERVER_TENANT_ID, ObCtxIds::WORK_AREA),
last_row_(alloc_),
cmp_(),
need_skip_init_row_(false),
last_row_idx_(-1)
{}
int ObMergeSetOp::inner_open()
{
int ret = OB_SUCCESS;
if (OB_ISNULL(left_) || OB_ISNULL(right_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status: left or right is null", K(ret), K(left_), K(right_));
} else {
const ObMergeSetSpec &spec = static_cast<const ObMergeSetSpec&>(get_spec());
if (OB_FAIL(cmp_.init(&spec.sort_collations_, &spec.sort_cmp_funs_))) {
LOG_WARN("failed to init compare function", K(ret));
}
}
return ret;
}
int ObMergeSetOp::inner_close()
{
return ObOperator::inner_close();
}
int ObMergeSetOp::inner_rescan()
{
int ret = OB_SUCCESS;
last_row_.reset();
alloc_.reset();
need_skip_init_row_ = false;
last_row_idx_ = -1;
if (OB_FAIL(ObOperator::inner_rescan())) {
LOG_WARN("failed to rescan", K(ret));
}
return ret;
}
void ObMergeSetOp::destroy()
{
last_row_.reset();
alloc_.reset();
ObOperator::destroy();
}
/**
* 拿数据时,由child_op来驱动,如果是left,则拿的就是left的数据,则后续就是需要用left来获取output的ObExprs
* 相反,如果是right op,则拿的就是right的数据,则后续需要用right来获取output的ObExprs
**/
int ObMergeSetOp::do_strict_distinct(
ObOperator &child_op,
const ObIArray<ObExpr*> &compare_row,
const ObIArray<ObExpr*> *&output_row,
int &cmp)
{
int ret = OB_SUCCESS;
bool is_break = false;
while (OB_SUCC(ret) && !is_break) {
if (OB_FAIL(child_op.get_next_row())) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to get next row", K(ret));
}
} else if (OB_FAIL(cmp_(
compare_row, child_op.get_spec().output_, eval_ctx_, cmp))) {
LOG_WARN("strict compare with last_row failed", K(ret), K(compare_row));
} else if (0 != cmp) {
is_break = true;
}
}
if (OB_SUCC(ret)) {
output_row = &child_op.get_spec().output_;
}
return ret;
}
int ObMergeSetOp::convert_row(
const ObChunkDatumStore::StoredRow *sr, const common::ObIArray<ObExpr*> &exprs)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(sr) || sr->cnt_ != exprs.count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status: store row is null or column count is not match", K(ret));
} else {
for (uint32_t i = 0; i < sr->cnt_; ++i) {
exprs.at(i)->locate_expr_datum(eval_ctx_) = sr->cells()[i];
exprs.at(i)->set_evaluated_projected(eval_ctx_);
}
LOG_DEBUG("trace convert row", K(ret), K(sr->cnt_), K(ROWEXPR2STR(eval_ctx_, exprs)));
}
return ret;
}
int ObMergeSetOp::convert_row(const common::ObIArray<ObExpr*> &src_exprs,
const common::ObIArray<ObExpr*> &dst_exprs,
const int64_t src_idx,
const int64_t dst_idx)
{
int ret = OB_SUCCESS;
if (dst_exprs.count() != src_exprs.count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status: exprs is not match", K(ret), K(src_exprs.count()),
K(dst_exprs.count()));
} else {
ObDatum *src_datum = nullptr;
ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx_);
batch_info_guard.set_batch_idx(src_idx);
for (uint32_t i = 0; i < dst_exprs.count() && OB_SUCC(ret); ++i) {
if (OB_FAIL(src_exprs.at(i)->eval(eval_ctx_, src_datum))) {
LOG_WARN("failed to eval expr", K(ret), K(i));
} else {
dst_exprs.at(i)->locate_expr_datum(eval_ctx_, dst_idx) = *src_datum;
dst_exprs.at(i)->set_evaluated_flag(eval_ctx_);
}
}
if (OB_SUCC(ret)) {
LOG_DEBUG("trace convert row", K(ret), K(ROWEXPR2STR(eval_ctx_, src_exprs)));
}
}
return ret;
}
int ObMergeSetOp::Compare::init(
const common::ObIArray<ObSortFieldCollation> *sort_collations,
const common::ObIArray<common::ObCmpFunc> *cmp_funcs)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(nullptr == sort_collations || nullptr == cmp_funcs)
|| sort_collations->count() != cmp_funcs->count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("compare info is null", K(ret), K(sort_collations), K(cmp_funcs));
} else {
sort_collations_ = sort_collations;
cmp_funcs_ = cmp_funcs;
}
return ret;
}
int ObMergeSetOp::Compare::operator()(
const ObChunkDatumStore::StoredRow &l,
const common::ObIArray<ObExpr*> &r,
ObEvalCtx &eval_ctx,
int &cmp)
{
int ret = OB_SUCCESS;
cmp = 0;
const ObDatum *lcells = l.cells();
ObDatum *r_datum = nullptr;
for (int64_t i = 0; i < sort_collations_->count() && OB_SUCC(ret); i++) {
int64_t idx = sort_collations_->at(i).field_idx_;
if (OB_FAIL(r.at(idx)->eval(eval_ctx, r_datum))) {
LOG_WARN("failed to get expr value", K(ret), K(i));
} else {
cmp = cmp_funcs_->at(i).cmp_func_(lcells[idx], *r_datum);
if (0 != cmp) {
cmp = sort_collations_->at(i).is_ascending_ ? cmp : -cmp;
break;
}
}
}
return ret;
}
int ObMergeSetOp::Compare::operator()(
const common::ObIArray<ObExpr*> &l,
const common::ObIArray<ObExpr*> &r,
ObEvalCtx &eval_ctx,
int &cmp)
{
int ret = OB_SUCCESS;
cmp = 0;
ObDatum *l_datum = nullptr;
ObDatum *r_datum = nullptr;
for (int64_t i = 0; i < sort_collations_->count() && OB_SUCC(ret); i++) {
int64_t idx = sort_collations_->at(i).field_idx_;
if (OB_FAIL(l.at(idx)->eval(eval_ctx, l_datum))) {
LOG_WARN("failed to get expr value", K(ret), K(i));
} else if (OB_FAIL(r.at(idx)->eval(eval_ctx, r_datum))) {
} else {
cmp = cmp_funcs_->at(i).cmp_func_(*l_datum, *r_datum);
LOG_DEBUG("debug compare merge set op", K(EXPR2STR(eval_ctx, *l.at(idx))),
K(EXPR2STR(eval_ctx, *r.at(idx))), K(cmp));
if (0 != cmp) {
cmp = sort_collations_->at(i).is_ascending_ ? cmp : -cmp;
break;
}
}
}
return ret;
}
int ObMergeSetOp::Compare::operator() (const common::ObIArray<ObExpr*> &l,
const common::ObIArray<ObExpr*> &r,
const int64_t l_idx,
const int64_t r_idx,
ObEvalCtx &eval_ctx,
int &cmp)
{
int ret = OB_SUCCESS;
cmp = 0;
ObDatum *l_datum = nullptr;
ObDatum *r_datum = nullptr;
ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx);
for (int64_t i = 0; i < sort_collations_->count() && OB_SUCC(ret); i++) {
int64_t idx = sort_collations_->at(i).field_idx_;
batch_info_guard.set_batch_idx(l_idx);
if (OB_FAIL(l.at(idx)->eval(eval_ctx, l_datum))) {
LOG_WARN("failed to get expr value", K(ret), K(i));
} else if (FALSE_IT(batch_info_guard.set_batch_idx(r_idx))) {
} else if (OB_FAIL(r.at(idx)->eval(eval_ctx, r_datum))) {
LOG_WARN("failed to get expr value", K(ret), K(i));
} else {
cmp = cmp_funcs_->at(i).cmp_func_(*l_datum, *r_datum);
if (0 != cmp) {
cmp = sort_collations_->at(i).is_ascending_ ? cmp : -cmp;
break;
}
}
}
return ret;
}
int ObMergeSetOp::Compare::operator() (const ObChunkDatumStore::StoredRow &l,
const common::ObIArray<ObExpr*> &r,
const int64_t r_idx,
ObEvalCtx &eval_ctx,
int &cmp)
{
int ret = OB_SUCCESS;
cmp = 0;
const ObDatum *l_datum = nullptr;
ObDatum *r_datum = nullptr;
const ObDatum *l_cells = l.cells();
ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx);
batch_info_guard.set_batch_idx(r_idx);
for (int64_t i = 0; i < sort_collations_->count() && OB_SUCC(ret); i++) {
int64_t idx = sort_collations_->at(i).field_idx_;
l_datum = &l_cells[idx];
batch_info_guard.set_batch_idx(r_idx);
if (OB_FAIL(r.at(idx)->eval(eval_ctx, r_datum))) {
} else {
cmp = cmp_funcs_->at(i).cmp_func_(*l_datum, *r_datum);
if (0 != cmp) {
cmp = sort_collations_->at(i).is_ascending_ ? cmp : -cmp;
break;
}
}
}
return ret;
}
int ObMergeSetOp::convert_batch(const common::ObIArray<ObExpr*> &src_exprs,
const common::ObIArray<ObExpr*> &dst_exprs,
ObBatchRows &brs,
bool is_union_all)
{
int ret = OB_SUCCESS;
if (0 == brs.size_) {
} else if (dst_exprs.count() != src_exprs.count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status: exprs is not match", K(ret), K(src_exprs.count()),
K(dst_exprs.count()));
} else {
if (!is_union_all) {
// reorg to dense
int64_t size = 0;
for (int64_t j = 0; j < brs.size_; ++j) {
if (brs.skip_->at(j)) {
continue;
}
for (uint32_t i = 0; i < dst_exprs.count(); ++i) {
ObDatum &src = src_exprs.at(i)->locate_expr_datum(eval_ctx_, j);
dst_exprs.at(i)->locate_expr_datum(eval_ctx_, size) = src;
}
++size;
} // end for
for (uint32_t i = 0; i < dst_exprs.count(); ++i) {
dst_exprs.at(i)->set_evaluated_projected(eval_ctx_);
}
brs.skip_->reset(brs.size_);
brs.size_ = size;
} else {
for (uint32_t i = 0; i < dst_exprs.count(); ++i) {
ObDatum *dst = dst_exprs.at(i)->locate_batch_datums(eval_ctx_);
if (!src_exprs.at(i)->is_batch_result()) {
ObDatum &src = src_exprs.at(i)->locate_expr_datum(eval_ctx_, 0);
for (int64_t j = 0; j < brs.size_; ++j) {
if (brs.skip_->at(j)) {
continue;
}
dst_exprs.at(i)->locate_expr_datum(eval_ctx_, j) = src;
}
} else {
ObDatum *src = src_exprs.at(i)->locate_batch_datums(eval_ctx_);
MEMCPY(dst, src, sizeof(ObDatum) * brs.size_);
}
dst_exprs.at(i)->set_evaluated_projected(eval_ctx_);
} // end for
}
}
return ret;
}
int ObMergeSetOp::locate_next_left_inside(ObOperator &child_op,
const int64_t last_idx,
const ObBatchRows &row_brs,
int64_t &curr_idx,
bool &is_first)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(is_first)) {
//return 1st valid row idx
is_first = false;
curr_idx = 0;
for (; curr_idx < row_brs.size_; ++curr_idx) {
if (!row_brs.skip_->at(curr_idx)) {
break;
}
}
OB_ASSERT(curr_idx < row_brs.size_);
} else {
bool got_row = false;
int cmp = 0;
while (OB_SUCC(ret) && !got_row) {
//try move to next row in batch, if cant, return iter_end
for (; curr_idx < row_brs.size_; ++curr_idx) {
if (!row_brs.skip_->at(curr_idx)) {
break;
}
}
if (curr_idx == row_brs.size_) {
ret = OB_ITER_END;
} else if (OB_NOT_NULL(last_row_.store_row_)
&& OB_FAIL(cmp_(*last_row_.store_row_, child_op.get_spec().output_,
curr_idx, eval_ctx_, cmp))) {
LOG_WARN("failed to compare row", K(ret));
} else if (nullptr == last_row_.store_row_
&& OB_FAIL(cmp_(child_op.get_spec().output_, child_op.get_spec().output_,
last_idx, curr_idx, eval_ctx_, cmp))) {
LOG_WARN("failed to compare row", K(ret));
} else if (0 != cmp) {
got_row = true;
} else {
++curr_idx;
}
}
}
return ret;
}
int ObMergeSetOp::locate_next_right(ObOperator &child_op,
const int64_t batch_size,
const ObBatchRows *&child_brs,
int64_t &curr_idx)
{
int ret = OB_SUCCESS;
//first batch
if (OB_ISNULL(child_brs) || child_brs->size_ == curr_idx/*last row in batch*/) {
if (OB_FAIL(child_op.get_next_batch(batch_size, child_brs))) {
LOG_WARN("failed to get next batch", K(ret));
} else if (child_brs->end_ && 0 == child_brs->size_) {
ret = OB_ITER_END;
} else {
curr_idx = 0;
//locate 1st valid row and return, we're sure at least 1 row will be got
for (; curr_idx < child_brs->size_; ++curr_idx) {
if (!child_brs->skip_->at(curr_idx)) {
break;
}
}
OB_ASSERT(curr_idx < child_brs->size_);
}
} else {
//try move to next row in batch, if cant, get next batch
for (; curr_idx < child_brs->size_; ++curr_idx) {
if (!child_brs->skip_->at(curr_idx)) {
break;
}
}
if (curr_idx == child_brs->size_) {
if (OB_FAIL(child_op.get_next_batch(batch_size, child_brs))) {
LOG_WARN("failed to get next batch", K(ret));
} else if (child_brs->end_ && 0 == child_brs->size_) {
ret = OB_ITER_END;
} else {
curr_idx = 0;
//locate 1st valid row and return, we're sure at least 1 row will be got
for (; curr_idx < child_brs->size_; ++curr_idx) {
if (!child_brs->skip_->at(curr_idx)) {
break;
}
}
OB_ASSERT(curr_idx < child_brs->size_);
}
}
}
return ret;
}
} // end namespace sql
} // end namespace oceanbase