212 lines
8.1 KiB
C++
212 lines
8.1 KiB
C++
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
|
|
#define USING_LOG_PREFIX SQL_ENG
|
|
|
|
#include "sql/engine/expr/ob_expr_substring_index.h"
|
|
#include "sql/engine/expr/ob_expr_util.h"
|
|
#include "sql/engine/ob_exec_context.h"
|
|
#include "lib/string/ob_string.h"
|
|
#include "sql/session/ob_sql_session_info.h"
|
|
#include "sql/engine/ob_exec_context.h"
|
|
|
|
namespace oceanbase
|
|
{
|
|
using namespace common;
|
|
namespace sql
|
|
{
|
|
|
|
|
|
ObExprSubstringIndex::ObExprSubstringIndex(ObIAllocator &alloc)
|
|
: ObStringExprOperator(alloc, T_FUN_SYS_SUBSTRING_INDEX, N_SUBSTRING_INDEX, 3, VALID_FOR_GENERATED_COL)
|
|
{
|
|
need_charset_convert_ = false;
|
|
}
|
|
|
|
ObExprSubstringIndex::~ObExprSubstringIndex()
|
|
{
|
|
}
|
|
|
|
inline int ObExprSubstringIndex::calc_result_type3(ObExprResType &type,
|
|
ObExprResType &str,
|
|
ObExprResType &delim,
|
|
ObExprResType &count,
|
|
ObExprTypeCtx &type_ctx) const
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
str.set_calc_type(ObVarcharType);
|
|
delim.set_calc_type(ObVarcharType);
|
|
|
|
//there can't set int type for count, because it is dynamically
|
|
//count.set_calc_type(ObIntType);
|
|
|
|
// substring_index is mysql only expr.
|
|
CK(true == lib::is_mysql_mode());
|
|
CK(NULL != type_ctx.get_session());
|
|
|
|
if (OB_SUCC(ret)) {
|
|
// cast count to int64, see comment in eval_substring_index
|
|
count.set_calc_type(ObIntType);
|
|
// Set cast mode for %count:
|
|
// truncate string to integer.
|
|
// no range check for uint to int
|
|
type_ctx.set_cast_mode(type_ctx.get_cast_mode() | CM_STRING_INTEGER_TRUNC);
|
|
type_ctx.set_cast_mode(type_ctx.get_cast_mode() | CM_NO_RANGE_CHECK);
|
|
}
|
|
|
|
if (OB_SUCC(ret)) {
|
|
type.set_varchar();
|
|
type.set_length(str.get_length());
|
|
common::ObArenaAllocator alloc;
|
|
ObExprResType types[2] = {alloc, alloc};
|
|
types[0] = str;
|
|
types[1] = delim;
|
|
if (OB_FAIL(aggregate_charsets_for_string_result_with_comparison(
|
|
type, types, 2, type_ctx.get_coll_type()))) {
|
|
LOG_WARN("aggregate_charsets_for_string_result_with_comparison failed", K(ret));
|
|
|
|
} else {
|
|
str.set_calc_collation_type(type.get_collation_type());
|
|
str.set_calc_collation_level(type.get_collation_level());
|
|
delim.set_calc_collation_type(type.get_collation_type());
|
|
delim.set_calc_collation_level(type.get_collation_level());
|
|
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObExprSubstringIndex::cg_expr(ObExprCGCtx &, const ObRawExpr &raw_expr, ObExpr &rt_expr) const
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
const ObRawExpr *text_expr = NULL;
|
|
const ObRawExpr *pattern_expr = NULL;
|
|
const ObRawExpr *nth_appearance_expr = NULL;
|
|
if (OB_UNLIKELY(3 != raw_expr.get_param_count())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
LOG_WARN("substring_index op should have 3 arguments", K(raw_expr.get_param_count()));
|
|
} else if (OB_ISNULL(text_expr = raw_expr.get_param_expr(0)) ||
|
|
OB_ISNULL(pattern_expr = raw_expr.get_param_expr(1)) ||
|
|
OB_ISNULL(nth_appearance_expr = raw_expr.get_param_expr(2))) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("null pointer", K(text_expr), K(pattern_expr), K(nth_appearance_expr));
|
|
} else if (rt_expr.arg_cnt_ != 3 || OB_ISNULL(rt_expr.args_)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("substring_index expr should have 3 arguments", K(ret),
|
|
K(rt_expr.arg_cnt_), K(rt_expr.args_));
|
|
} else if (OB_ISNULL(rt_expr.args_[0]) || OB_ISNULL(rt_expr.args_[1]) ||
|
|
OB_ISNULL(rt_expr.args_[2])) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("child is null", K(ret), K(rt_expr.args_[0]), K(rt_expr.args_[1]),
|
|
K(rt_expr.args_[2]));
|
|
} else {
|
|
rt_expr.eval_func_ = eval_substring_index;
|
|
rt_expr.eval_batch_func_ = ObExprSubstringIndex::eval_substring_index_batch;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObExprSubstringIndex::eval_substring_index(
|
|
const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObDatum &str = expr.locate_param_datum(ctx, 0);
|
|
ObDatum &delim = expr.locate_param_datum(ctx, 1);
|
|
ObDatum &count = expr.locate_param_datum(ctx, 2);
|
|
if (OB_FAIL(expr.eval_param_value(ctx))) {
|
|
LOG_WARN("evaluate parameters failed", K(ret));
|
|
} else if (OB_UNLIKELY(str.is_null() || delim.is_null() || count.is_null())) {
|
|
expr_datum.set_null();
|
|
} else if (0 == str.len_ || 0 == delim.len_) {
|
|
// return empty string if %str or %delim is empty.
|
|
//重置null flag 防止丢失空串信息
|
|
expr_datum.null_ = 0;
|
|
expr_datum.len_ = 0;
|
|
} else {
|
|
const ObString m_delim = delim.get_string();
|
|
// mysql 5.6 static cast count to int32,
|
|
// actually this is a bug and fixed in mysql 8.0.
|
|
int64_t count_val = count.get_int();
|
|
ObString res_str;
|
|
ObExprKMPSearchCtx *kmp_ctx = NULL;
|
|
const uint64_t op_id = static_cast<uint64_t>(expr.expr_ctx_id_);
|
|
if (OB_FAIL(ObExprKMPSearchCtx::get_kmp_ctx_from_exec_ctx(ctx.exec_ctx_, op_id, kmp_ctx))) {
|
|
LOG_WARN("get kmp ctx failed", K(ret));
|
|
} else if (OB_FAIL(kmp_ctx->init(m_delim, count_val < 0, ctx.exec_ctx_.get_allocator()))) {
|
|
LOG_WARN("init kmp ctx failed", K(ret), K(m_delim));
|
|
} else if (OB_FAIL(kmp_ctx->substring_index_search(str.get_string(), count_val, res_str))) {
|
|
LOG_WARN("string search failed", K(ret));
|
|
} else {
|
|
expr_datum.set_string(res_str);
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObExprSubstringIndex::eval_substring_index_batch(const ObExpr &expr,
|
|
ObEvalCtx &ctx,
|
|
const ObBitVector &skip,
|
|
const int64_t batch_size)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObExprKMPSearchCtx *kmp_ctx = NULL;
|
|
const uint64_t op_id = static_cast<uint64_t>(expr.expr_ctx_id_);
|
|
if (OB_FAIL(expr.args_[0]->eval_batch(ctx, skip, batch_size))) {
|
|
LOG_WARN("eval args_0 failed", K(ret));
|
|
} else if (OB_FAIL(expr.args_[1]->eval_batch(ctx, skip, batch_size))) {
|
|
LOG_WARN("eval args_1 failed", K(ret));
|
|
} else if (OB_FAIL(expr.args_[2]->eval_batch(ctx, skip, batch_size))) {
|
|
LOG_WARN("eval args_2 failed", K(ret));
|
|
} else if (OB_FAIL(ObExprKMPSearchCtx::get_kmp_ctx_from_exec_ctx(ctx.exec_ctx_,
|
|
op_id, kmp_ctx))) {
|
|
LOG_WARN("get kmp ctx failed", K(ret));
|
|
} else {
|
|
ObDatum *res = expr.locate_batch_datums(ctx);
|
|
ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < batch_size; ++i) {
|
|
if (skip.at(i) || eval_flags.at(i)) {
|
|
continue;;
|
|
}
|
|
ObString res_str;
|
|
int64_t count_val = 0;
|
|
ObDatum &text = expr.args_[0]->locate_expr_datum(ctx, i);
|
|
ObDatum &delim = expr.args_[1]->locate_expr_datum(ctx, i);
|
|
ObDatum &count = expr.args_[2]->locate_expr_datum(ctx, i);
|
|
if (OB_UNLIKELY(text.is_null() || delim.is_null() || count.is_null())) {
|
|
res[i].set_null();
|
|
} else if (0 == text.len_ || 0 == delim.len_ ||
|
|
0 == (count_val = count.get_int())) {
|
|
// return empty string if %str or %delim is empty.
|
|
//重置null flag 防止丢失空串信息
|
|
res[i].null_ = 0;
|
|
res[i].len_ = 0;
|
|
} else if (OB_FAIL(kmp_ctx->init(delim.get_string(),
|
|
count_val < 0, ctx.exec_ctx_.get_allocator()))) {
|
|
LOG_WARN("init kmp ctx failed", K(ret), K(delim));
|
|
} else if (OB_FAIL(kmp_ctx->substring_index_search(text.get_string(), count_val, res_str))) {
|
|
LOG_WARN("string search failed", K(ret));
|
|
} else {
|
|
res[i].set_string(res_str);
|
|
}
|
|
eval_flags.set(i);
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
} /* sql */
|
|
} /* oceanbase */
|
|
|