patch 4.0
This commit is contained in:
@ -14,37 +14,46 @@
|
||||
|
||||
#include "sql/engine/expr/ob_expr_substring_index.h"
|
||||
#include "sql/engine/expr/ob_expr_util.h"
|
||||
#include "sql/engine/ob_exec_context.h"
|
||||
#include "lib/string/ob_string.h"
|
||||
#include "sql/session/ob_sql_session_info.h"
|
||||
#include "sql/engine/ob_exec_context.h"
|
||||
|
||||
namespace oceanbase {
|
||||
namespace oceanbase
|
||||
{
|
||||
using namespace common;
|
||||
namespace sql {
|
||||
namespace sql
|
||||
{
|
||||
|
||||
ObExprSubstringIndex::ObExprSubstringIndex(ObIAllocator& alloc)
|
||||
|
||||
ObExprSubstringIndex::ObExprSubstringIndex(ObIAllocator &alloc)
|
||||
: ObStringExprOperator(alloc, T_FUN_SYS_SUBSTRING_INDEX, N_SUBSTRING_INDEX, 3)
|
||||
{
|
||||
need_charset_convert_ = false;
|
||||
}
|
||||
|
||||
ObExprSubstringIndex::~ObExprSubstringIndex()
|
||||
{}
|
||||
{
|
||||
}
|
||||
|
||||
inline int ObExprSubstringIndex::calc_result_type3(
|
||||
ObExprResType& type, ObExprResType& str, ObExprResType& delim, ObExprResType& count, ObExprTypeCtx& type_ctx) const
|
||||
inline int ObExprSubstringIndex::calc_result_type3(ObExprResType &type,
|
||||
ObExprResType &str,
|
||||
ObExprResType &delim,
|
||||
ObExprResType &count,
|
||||
ObExprTypeCtx &type_ctx) const
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
str.set_calc_type(ObVarcharType);
|
||||
delim.set_calc_type(ObVarcharType);
|
||||
|
||||
// there can't set int type for count, because it is dynamically
|
||||
// count.set_calc_type(ObIntType);
|
||||
//there can't set int type for count, because it is dynamically
|
||||
//count.set_calc_type(ObIntType);
|
||||
|
||||
// substring_index is mysql only expr.
|
||||
CK(true == lib::is_mysql_mode());
|
||||
CK(NULL != type_ctx.get_session());
|
||||
|
||||
if (OB_SUCC(ret) && type_ctx.get_session()->use_static_typing_engine()) {
|
||||
if (OB_SUCC(ret)) {
|
||||
// cast count to int64, see comment in eval_substring_index
|
||||
count.set_calc_type(ObIntType);
|
||||
// Set cast mode for %count:
|
||||
@ -61,7 +70,8 @@ inline int ObExprSubstringIndex::calc_result_type3(
|
||||
ObExprResType types[2] = {alloc, alloc};
|
||||
types[0] = str;
|
||||
types[1] = delim;
|
||||
if (OB_FAIL(aggregate_charsets_for_string_result_with_comparison(type, types, 2, type_ctx.get_coll_type()))) {
|
||||
if (OB_FAIL(aggregate_charsets_for_string_result_with_comparison(
|
||||
type, types, 2, type_ctx.get_coll_type()))) {
|
||||
LOG_WARN("aggregate_charsets_for_string_result_with_comparison failed", K(ret));
|
||||
|
||||
} else {
|
||||
@ -69,165 +79,73 @@ inline int ObExprSubstringIndex::calc_result_type3(
|
||||
str.set_calc_collation_level(type.get_collation_level());
|
||||
delim.set_calc_collation_type(type.get_collation_type());
|
||||
delim.set_calc_collation_level(type.get_collation_level());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* steps:
|
||||
* 1. check null: if any param is null, return null
|
||||
* 2. type promotion: convert str and delim to string
|
||||
, convert count to int
|
||||
* 3. check empty: if any param is empty string, return empty string
|
||||
* 4. call string_search: if everything is OK, find nth apperance of delim
|
||||
*/
|
||||
int ObExprSubstringIndex::calc_result3(
|
||||
ObObj& result, const ObObj& str, const ObObj& delim, const ObObj& count, common::ObExprCtx& expr_ctx) const
|
||||
int ObExprSubstringIndex::cg_expr(ObExprCGCtx &, const ObRawExpr &raw_expr, ObExpr &rt_expr) const
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
// check null
|
||||
if (OB_UNLIKELY(str.is_null() || delim.is_null() || count.is_null())) {
|
||||
result.set_null();
|
||||
const ObRawExpr *text_expr = NULL;
|
||||
const ObRawExpr *pattern_expr = NULL;
|
||||
const ObRawExpr *nth_appearance_expr = NULL;
|
||||
if (OB_UNLIKELY(3 != raw_expr.get_param_count())) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("substring_index op should have 3 arguments", K(raw_expr.get_param_count()));
|
||||
} else if (OB_ISNULL(text_expr = raw_expr.get_param_expr(0)) ||
|
||||
OB_ISNULL(pattern_expr = raw_expr.get_param_expr(1)) ||
|
||||
OB_ISNULL(nth_appearance_expr = raw_expr.get_param_expr(2))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("null pointer", K(text_expr), K(pattern_expr), K(nth_appearance_expr));
|
||||
} else if (rt_expr.arg_cnt_ != 3 || OB_ISNULL(rt_expr.args_)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("substring_index expr should have 3 arguments", K(ret),
|
||||
K(rt_expr.arg_cnt_), K(rt_expr.args_));
|
||||
} else if (OB_ISNULL(rt_expr.args_[0]) || OB_ISNULL(rt_expr.args_[1]) ||
|
||||
OB_ISNULL(rt_expr.args_[2])) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("child is null", K(ret), K(rt_expr.args_[0]), K(rt_expr.args_[1]),
|
||||
K(rt_expr.args_[2]));
|
||||
} else {
|
||||
/*
|
||||
* int64:
|
||||
* <0 val = left_point - val (abs(left_point - val) <= len)
|
||||
* >0 val = val - right_point(abs(val - right_point) <= len)
|
||||
* uint64:
|
||||
* val = val - right_point(abs(val - right_point)<=len)
|
||||
*/
|
||||
|
||||
ObString str_val = str.get_string();
|
||||
ObString delim_val = delim.get_string();
|
||||
int64_t count_val = 0;
|
||||
EXPR_DEFINE_CAST_CTX(expr_ctx, CM_NONE);
|
||||
EXPR_GET_INT64_V2(count, count_val);
|
||||
|
||||
/* deal with overflow */
|
||||
int32_t str_len = str.get_string().length();
|
||||
if (OB_SUCCESS == cast_ctx.warning_) {
|
||||
if (count_val < 0) {
|
||||
count_val = count_val - INT64_MIN <= str_len ? count_val - INT64_MIN : count_val;
|
||||
} else {
|
||||
count_val = INT64_MAX - count_val < str_len ? count_val - INT64_MAX - 1 : count_val;
|
||||
}
|
||||
} else {
|
||||
uint64_t count_uval = 0;
|
||||
EXPR_GET_UINT64_V2(count, count_uval);
|
||||
if (OB_SUCCESS == cast_ctx.warning_) {
|
||||
#define RIGHT_POINT 9223372036854775808UL
|
||||
count_val = static_cast<int64_t>(count_uval - RIGHT_POINT) <= str_len
|
||||
? static_cast<int64_t>(count_uval - RIGHT_POINT)
|
||||
: 0;
|
||||
#undef RIGHT_POINT
|
||||
} else {
|
||||
count_val = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret)) {
|
||||
// check empty string
|
||||
ObString empty;
|
||||
if (str_val == empty || delim_val == empty) {
|
||||
result.set_varchar(empty);
|
||||
} else {
|
||||
// everything is OK, find nth apperance of delim, set result
|
||||
ObString res_str;
|
||||
if (OB_FAIL(string_search(res_str, str_val, delim_val, count_val))) {
|
||||
LOG_WARN("string search failed", K(ret));
|
||||
} else {
|
||||
result.set_varchar(res_str);
|
||||
}
|
||||
}
|
||||
if (OB_LIKELY(OB_SUCC(ret) && !result.is_null())) {
|
||||
result.set_collation(result_type_);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* steps:
|
||||
* 1. if m_count = 0, return empty string
|
||||
* 2. if m_count != 0
|
||||
* A. if m_count > 0, find position of delim from front to back,
|
||||
* return string from 0 to position in m_str
|
||||
* B. if m_count < 0, find position of delim form back to front,
|
||||
* return string form position to the end
|
||||
* C. if position = -1 in A and B, nth m_delim is not found, return m_str
|
||||
*/
|
||||
int ObExprSubstringIndex::string_search(
|
||||
ObString& varchar, const ObString& m_str, const ObString& m_delim, const int64_t m_count)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
if (0 != m_count) {
|
||||
int64_t pos = -1;
|
||||
if (0 < m_count) {
|
||||
ret = ObExprUtil::kmp(const_cast<char*>(m_delim.ptr()),
|
||||
m_delim.length(),
|
||||
const_cast<char*>(m_str.ptr()),
|
||||
m_str.length(),
|
||||
m_count,
|
||||
pos);
|
||||
if (-1 < pos) {
|
||||
// nth delim found from front to back
|
||||
varchar.assign(const_cast<char*>(m_str.ptr()), static_cast<int32_t>(pos));
|
||||
}
|
||||
} else if (0 > m_count) {
|
||||
ret = ObExprUtil::kmp_reverse(const_cast<char*>(m_delim.ptr()),
|
||||
m_delim.length(),
|
||||
const_cast<char*>(m_str.ptr()),
|
||||
m_str.length(),
|
||||
m_count,
|
||||
pos);
|
||||
if (-1 < pos) {
|
||||
// nth delim found from back to front
|
||||
varchar.assign(const_cast<char*>(m_str.ptr() + static_cast<int32_t>(pos + m_delim.length())),
|
||||
static_cast<int32_t>(m_str.length() - m_delim.length() - pos));
|
||||
}
|
||||
}
|
||||
|
||||
if (-1 == pos) {
|
||||
// substring not found, return m_str
|
||||
varchar.assign(const_cast<char*>(m_str.ptr()), static_cast<int32_t>(m_str.length()));
|
||||
}
|
||||
rt_expr.eval_func_ = eval_substring_index;
|
||||
rt_expr.eval_batch_func_ = ObExprSubstringIndex::eval_substring_index_batch;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObExprSubstringIndex::cg_expr(ObExprCGCtx&, const ObRawExpr&, ObExpr& rt_expr) const
|
||||
int ObExprSubstringIndex::eval_substring_index(
|
||||
const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
CK(3 == rt_expr.arg_cnt_);
|
||||
rt_expr.eval_func_ = eval_substring_index;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObExprSubstringIndex::eval_substring_index(const ObExpr& expr, ObEvalCtx& ctx, ObDatum& expr_datum)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObDatum& str = expr.locate_param_datum(ctx, 0);
|
||||
ObDatum& delim = expr.locate_param_datum(ctx, 1);
|
||||
ObDatum& count = expr.locate_param_datum(ctx, 2);
|
||||
ObDatum &str = expr.locate_param_datum(ctx, 0);
|
||||
ObDatum &delim = expr.locate_param_datum(ctx, 1);
|
||||
ObDatum &count = expr.locate_param_datum(ctx, 2);
|
||||
if (OB_FAIL(expr.eval_param_value(ctx))) {
|
||||
LOG_WARN("evaluate parameters failed", K(ret));
|
||||
} else if (OB_UNLIKELY(str.is_null() || delim.is_null() || count.is_null())) {
|
||||
expr_datum.set_null();
|
||||
} else if (0 == str.len_ || 0 == delim.len_) {
|
||||
// return empty string if %str or %delim is empty.
|
||||
//重置null flag 防止丢失空串信息 https://work.aone.alibaba-inc.com/issue/30873161
|
||||
expr_datum.null_ = 0;
|
||||
expr_datum.len_ = 0;
|
||||
} else {
|
||||
const ObString m_delim = delim.get_string();
|
||||
// Static cast count to int32, compatible with mysql 5.6,
|
||||
// actually this is a bug and fixed in mysql 8.0.
|
||||
int32_t count_val = static_cast<int32_t>(count.get_int());
|
||||
ObString res_str;
|
||||
if (OB_FAIL(string_search(res_str, str.get_string(), delim.get_string(), count_val))) {
|
||||
ObExprKMPSearchCtx *kmp_ctx = NULL;
|
||||
const uint64_t op_id = static_cast<uint64_t>(expr.expr_ctx_id_);
|
||||
if (OB_FAIL(ObExprKMPSearchCtx::get_kmp_ctx_from_exec_ctx(ctx.exec_ctx_, op_id, kmp_ctx))) {
|
||||
LOG_WARN("get kmp ctx failed", K(ret));
|
||||
} else if (OB_FAIL(kmp_ctx->init(m_delim, count_val < 0, ctx.exec_ctx_.get_allocator()))) {
|
||||
LOG_WARN("init kmp ctx failed", K(ret), K(m_delim));
|
||||
} else if (OB_FAIL(kmp_ctx->substring_index_search(str.get_string(), count_val, res_str))) {
|
||||
LOG_WARN("string search failed", K(ret));
|
||||
} else {
|
||||
expr_datum.set_string(res_str);
|
||||
@ -237,5 +155,57 @@ int ObExprSubstringIndex::eval_substring_index(const ObExpr& expr, ObEvalCtx& ct
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace sql
|
||||
} // namespace oceanbase
|
||||
int ObExprSubstringIndex::eval_substring_index_batch(const ObExpr &expr,
|
||||
ObEvalCtx &ctx,
|
||||
const ObBitVector &skip,
|
||||
const int64_t batch_size)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObExprKMPSearchCtx *kmp_ctx = NULL;
|
||||
const uint64_t op_id = static_cast<uint64_t>(expr.expr_ctx_id_);
|
||||
if (OB_FAIL(expr.args_[0]->eval_batch(ctx, skip, batch_size))) {
|
||||
LOG_WARN("eval args_0 failed", K(ret));
|
||||
} else if (OB_FAIL(expr.args_[1]->eval_batch(ctx, skip, batch_size))) {
|
||||
LOG_WARN("eval args_1 failed", K(ret));
|
||||
} else if (OB_FAIL(expr.args_[2]->eval_batch(ctx, skip, batch_size))) {
|
||||
LOG_WARN("eval args_2 failed", K(ret));
|
||||
} else if (OB_FAIL(ObExprKMPSearchCtx::get_kmp_ctx_from_exec_ctx(ctx.exec_ctx_,
|
||||
op_id, kmp_ctx))) {
|
||||
LOG_WARN("get kmp ctx failed", K(ret));
|
||||
} else {
|
||||
ObDatum *res = expr.locate_batch_datums(ctx);
|
||||
ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < batch_size; ++i) {
|
||||
if (skip.at(i) || eval_flags.at(i)) {
|
||||
continue;;
|
||||
}
|
||||
ObString res_str;
|
||||
int32_t count_val = 0;
|
||||
ObDatum &text = expr.args_[0]->locate_expr_datum(ctx, i);
|
||||
ObDatum &delim = expr.args_[1]->locate_expr_datum(ctx, i);
|
||||
ObDatum &count = expr.args_[2]->locate_expr_datum(ctx, i);
|
||||
if (OB_UNLIKELY(text.is_null() || delim.is_null() || count.is_null())) {
|
||||
res[i].set_null();
|
||||
} else if (0 == text.len_ || 0 == delim.len_ ||
|
||||
0 == (count_val = static_cast<int32_t>(count.get_int()))) {
|
||||
// return empty string if %str or %delim is empty.
|
||||
//重置null flag 防止丢失空串信息 https://work.aone.alibaba-inc.com/issue/30873161
|
||||
res[i].null_ = 0;
|
||||
res[i].len_ = 0;
|
||||
} else if (OB_FAIL(kmp_ctx->init(delim.get_string(),
|
||||
count_val < 0, ctx.exec_ctx_.get_allocator()))) {
|
||||
LOG_WARN("init kmp ctx failed", K(ret), K(delim));
|
||||
} else if (OB_FAIL(kmp_ctx->substring_index_search(text.get_string(), count_val, res_str))) {
|
||||
LOG_WARN("string search failed", K(ret));
|
||||
} else {
|
||||
res[i].set_string(res_str);
|
||||
}
|
||||
eval_flags.set(i);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} /* sql */
|
||||
} /* oceanbase */
|
||||
|
||||
|
Reference in New Issue
Block a user