[FEAT MERGE] Lob SQL refactoring (Mem-LobLocator, expressions and dbms_lob adaptions)
Co-authored-by: chaser-ch <chaser.ch@antgroup.com>
This commit is contained in:
@ -20,6 +20,7 @@
|
||||
#include "objit/common/ob_item_type.h"
|
||||
#include "sql/session/ob_sql_session_info.h"
|
||||
#include "sql/engine/ob_exec_context.h"
|
||||
#include "sql/engine/expr/ob_expr_lob_utils.h"
|
||||
|
||||
using namespace oceanbase::common;
|
||||
using namespace oceanbase::sql;
|
||||
@ -84,6 +85,128 @@ int ObExprInstrb::calc_result_typeN(ObExprResType &type,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int calc_instrb_text(ObTextStringIter &haystack_iter,
|
||||
ObTextStringIter &needle_iter,
|
||||
ObIAllocator &calc_alloc,
|
||||
ObExprKMPSearchCtx *kmp_ctx,
|
||||
ObEvalCtx &ctx,
|
||||
int64_t &pos_int,
|
||||
int64_t &occ_int,
|
||||
ObDatum &res_datum)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObString haystack_data;
|
||||
ObString needle_data;
|
||||
int64_t ret_idx = -1;
|
||||
if (OB_FAIL(haystack_iter.init(0, NULL, &calc_alloc))) {
|
||||
LOG_WARN("init haystack_iter failed ", K(ret), K(haystack_iter));
|
||||
} else if (OB_FAIL(needle_iter.init(0, NULL, &calc_alloc))) {
|
||||
LOG_WARN("init needle_iter failed ", K(ret), K(needle_iter));
|
||||
} else if (OB_FAIL(needle_iter.get_full_data(needle_data))) {
|
||||
LOG_WARN("init lob str iter failed ", K(ret), K(needle_iter));
|
||||
} else if (OB_FAIL(kmp_ctx->init(needle_data, pos_int < 0, ctx.exec_ctx_.get_allocator()))) {
|
||||
LOG_WARN("init kmp ctx failed", K(ret), K(needle_data));
|
||||
} else {
|
||||
ObTextStringIterState state;
|
||||
size_t needle_byte_len = needle_data.length();
|
||||
if (haystack_iter.is_outrow_lob()) {
|
||||
haystack_iter.set_reserved_byte_len(needle_byte_len - 1);
|
||||
if (pos_int > 0) {
|
||||
haystack_iter.set_start_offset(pos_int - 1); // start char len
|
||||
pos_int = 1; // start pos is handled by lob mngr for out row lobs
|
||||
} else {
|
||||
haystack_iter.set_backward(); // search backward if pos < 0
|
||||
}
|
||||
}
|
||||
// search one block each loop
|
||||
bool not_first_search = false;
|
||||
if (pos_int > 0) {
|
||||
int64_t count = 0;
|
||||
while (count < occ_int && (state = haystack_iter.get_next_block(haystack_data)) == TEXTSTRING_ITER_NEXT) {
|
||||
if (not_first_search) {
|
||||
ret_idx = -1;
|
||||
pos_int = 1;
|
||||
}
|
||||
for (; count < occ_int; ++count) {
|
||||
if (OB_FAIL(kmp_ctx->instrb_search(haystack_data, pos_int, 1, ret_idx))) {
|
||||
LOG_WARN("search needle in haystack failed", K(ret), K(haystack_data), K(needle_data));
|
||||
} else {
|
||||
if (ret_idx <= 0) {
|
||||
break;
|
||||
} else {
|
||||
pos_int = ret_idx + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
not_first_search = true;
|
||||
}
|
||||
if (state != TEXTSTRING_ITER_NEXT && state != TEXTSTRING_ITER_END) {
|
||||
ret = OB_INVALID_DATA;
|
||||
LOG_WARN("get_calc_cs_type failed", K(ret), K(state));
|
||||
} else {
|
||||
if (ret_idx != 0) { // add accessed length by get_next_block()
|
||||
ret_idx += haystack_iter.get_last_accessed_byte_len() + haystack_iter.get_start_offset();
|
||||
if (haystack_iter.get_iter_count() > 1) { // minus reserved length
|
||||
OB_ASSERT(ret_idx > haystack_iter.get_reserved_byte_len());
|
||||
ret_idx -= haystack_iter.get_reserved_byte_len();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int64_t total_byte_len = 0;
|
||||
int64_t max_access_byte_len = 0;
|
||||
if (OB_FAIL(haystack_iter.get_byte_len(total_byte_len))) {
|
||||
LOG_WARN("get haystack char len failed", K(ret), K(state));
|
||||
} else if (haystack_iter.is_outrow_lob()) {
|
||||
max_access_byte_len = total_byte_len + pos_int + 1;
|
||||
haystack_iter.set_start_offset(-pos_int - 1); // start char len
|
||||
pos_int = -1;
|
||||
} else {
|
||||
max_access_byte_len = total_byte_len;
|
||||
}
|
||||
int64_t count = 0;
|
||||
while (OB_SUCC(ret)
|
||||
&& count < occ_int
|
||||
&& (state = haystack_iter.get_next_block(haystack_data)) == TEXTSTRING_ITER_NEXT) {
|
||||
if (not_first_search) {
|
||||
ret_idx = -1;
|
||||
pos_int = -1;
|
||||
}
|
||||
for (; count < occ_int; ++count) {
|
||||
if (OB_FAIL(kmp_ctx->instrb_search(haystack_data, pos_int, 1, ret_idx))) {
|
||||
LOG_WARN("search needle in haystack failed", K(ret), K(haystack_data), K(needle_data));
|
||||
} else {
|
||||
if (ret_idx <= 0) {
|
||||
break;
|
||||
} else { // Notice: negative pos
|
||||
pos_int = -1 -(static_cast<int64_t>(haystack_iter.get_accessed_len()) - ret_idx + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
not_first_search = true;
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (state != TEXTSTRING_ITER_NEXT && state != TEXTSTRING_ITER_END) {
|
||||
ret = OB_INVALID_DATA;
|
||||
LOG_WARN("get_calc_cs_type failed", K(ret), K(state));
|
||||
} else {
|
||||
if (ret_idx != 0) { // add accessed length by get_next_block()
|
||||
ret_idx += (max_access_byte_len - static_cast<int64_t>(haystack_iter.get_accessed_byte_len()));
|
||||
}
|
||||
}
|
||||
}
|
||||
number::ObNumber res_nmb;
|
||||
ObNumStackOnceAlloc tmp_alloc;
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(res_nmb.from(ret_idx, tmp_alloc))) {
|
||||
LOG_WARN("int64_t to ObNumber failed", K(ret), K(ret_idx));
|
||||
} else {
|
||||
res_datum.set_number(res_nmb);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int calc_instrb_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -94,13 +217,17 @@ int calc_instrb_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum)
|
||||
int64_t occ_int = 0;
|
||||
number::ObNumber res_nmb;
|
||||
ObCollationType calc_cs_type = CS_TYPE_INVALID;
|
||||
const ObObjType haystack_type = expr.args_[0]->datum_meta_.type_;
|
||||
const ObObjType needle_type = expr.args_[1]->datum_meta_.type_;
|
||||
bool haystack_has_lob_header = expr.args_[0]->obj_meta_.has_lob_header();
|
||||
bool needle_has_lob_header = expr.args_[1]->obj_meta_.has_lob_header();
|
||||
if (OB_FAIL(ObExprOracleInstr::calc_oracle_instr_arg(expr, ctx, is_null,
|
||||
haystack, needle,
|
||||
pos_int, occ_int, calc_cs_type))) {
|
||||
LOG_WARN("calc_instrb_arg failed", K(ret));
|
||||
} else if (is_null) {
|
||||
res_datum.set_null();
|
||||
} else if (OB_UNLIKELY(0 == haystack->get_string().length())) {
|
||||
} else if (OB_UNLIKELY(0 == haystack->get_string().length() || ob_is_empty_lob(haystack_type, *haystack, haystack_has_lob_header))) {
|
||||
ObNumStackOnceAlloc tmp_alloc;
|
||||
if (OB_FAIL(res_nmb.from((uint64_t)(0), tmp_alloc))) {
|
||||
LOG_WARN("get number from int failed", K(ret));
|
||||
@ -123,10 +250,10 @@ int calc_instrb_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum)
|
||||
ObNumStackOnceAlloc tmp_alloc;
|
||||
ObExprKMPSearchCtx *kmp_ctx = NULL;
|
||||
const uint64_t op_id = static_cast<uint64_t>(expr.expr_ctx_id_);
|
||||
if (OB_ISNULL(needle_str.ptr())) {
|
||||
if (OB_ISNULL(needle_str.ptr()) || ob_is_empty_lob(needle_type, *needle, needle_has_lob_header)) {
|
||||
ret = OB_CLOB_ONLY_SUPPORT_WITH_MULTIBYTE_FUN;
|
||||
LOG_WARN("ptr is null", K(needle));
|
||||
} else if (OB_UNLIKELY(0 == needle_str.length())) {
|
||||
} else if (OB_UNLIKELY(0 == needle_str.length() || ob_is_empty_lob(needle_type, *needle, needle_has_lob_header))) {
|
||||
if (OB_FAIL(res_nmb.from((uint64_t)(0), tmp_alloc))) {
|
||||
LOG_WARN("get number from int failed", K(ret));
|
||||
} else {
|
||||
@ -134,14 +261,27 @@ int calc_instrb_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum)
|
||||
}
|
||||
} else if (OB_FAIL(ObExprKMPSearchCtx::get_kmp_ctx_from_exec_ctx(ctx.exec_ctx_, op_id, kmp_ctx))) {
|
||||
LOG_WARN("get kmp ctx failed", K(ret));
|
||||
} else if (OB_FAIL(kmp_ctx->init(needle_str, pos_int < 0, ctx.exec_ctx_.get_allocator()))) {
|
||||
LOG_WARN("init kmp ctx failed", K(ret), K(needle_str));
|
||||
} else if (OB_FAIL(kmp_ctx->instrb_search(haystack_str, pos_int, occ_int, ret_idx))) {
|
||||
LOG_WARN("search needle in haystack failed", K(ret), K(haystack_str), K(needle_str));
|
||||
} else if (OB_FAIL(res_nmb.from(ret_idx, tmp_alloc))) {
|
||||
LOG_WARN("int64_t to ObNumber failed", K(ret), K(ret_idx));
|
||||
} else {
|
||||
res_datum.set_number(res_nmb);
|
||||
} else if (!ob_is_text_tc(haystack_type) && !ob_is_text_tc(needle_type)) {
|
||||
if (OB_FAIL(kmp_ctx->init(needle_str, pos_int < 0, ctx.exec_ctx_.get_allocator()))) {
|
||||
LOG_WARN("init kmp ctx failed", K(ret), K(needle_str));
|
||||
} else if (OB_FAIL(kmp_ctx->instrb_search(haystack_str, pos_int, occ_int, ret_idx))) {
|
||||
LOG_WARN("search needle in haystack failed", K(ret), K(haystack_str), K(needle_str));
|
||||
} else if (OB_FAIL(res_nmb.from(ret_idx, tmp_alloc))) {
|
||||
LOG_WARN("int64_t to ObNumber failed", K(ret), K(ret_idx));
|
||||
} else {
|
||||
res_datum.set_number(res_nmb);
|
||||
}
|
||||
} else { // text types
|
||||
// similar to instr, but reserve length is byte len, use binary cs type for byte access
|
||||
ObTextStringIter haystack_iter(haystack_type, CS_TYPE_BINARY, haystack->get_string(), haystack_has_lob_header);
|
||||
ObTextStringIter needle_iter(needle_type, calc_cs_type, needle->get_string(), needle_has_lob_header);
|
||||
ObEvalCtx::TempAllocGuard alloc_guard(ctx);
|
||||
ObIAllocator &calc_alloc = alloc_guard.get_allocator();
|
||||
|
||||
if (OB_FAIL(calc_instrb_text(haystack_iter, needle_iter, calc_alloc,
|
||||
kmp_ctx, ctx, pos_int, occ_int, res_datum))) {
|
||||
LOG_WARN("failed to calc_instrb_text", K(ret));
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
@ -156,6 +296,10 @@ int ObExprInstrb::calc_instrb_expr_batch(const ObExpr &expr,
|
||||
ObExprKMPSearchCtx *kmp_ctx = NULL;
|
||||
const uint64_t op_id = static_cast<uint64_t>(expr.expr_ctx_id_);
|
||||
ObCollationType calc_cs_type = CS_TYPE_INVALID;
|
||||
const ObObjType haystack_type = expr.args_[0]->datum_meta_.type_;
|
||||
const ObObjType needle_type = expr.args_[1]->datum_meta_.type_;
|
||||
bool haystack_has_lob_header = expr.args_[0]->obj_meta_.has_lob_header();
|
||||
bool needle_has_lob_header = expr.args_[1]->obj_meta_.has_lob_header();
|
||||
if (OB_FAIL(expr.args_[0]->eval_batch(ctx, skip, batch_size))) {
|
||||
LOG_WARN("eval args_0 failed", K(ret));
|
||||
} else if (OB_FAIL(expr.args_[1]->eval_batch(ctx, skip, batch_size))) {
|
||||
@ -212,7 +356,7 @@ int ObExprInstrb::calc_instrb_expr_batch(const ObExpr &expr,
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_UNLIKELY(is_null)) {
|
||||
res_datum[i].set_null();
|
||||
} else if (OB_UNLIKELY(0 == haystack.get_string().length())) {
|
||||
} else if (OB_UNLIKELY(0 == haystack.get_string().length() || ob_is_empty_lob(haystack_type, haystack, haystack_has_lob_header))) {
|
||||
ObNumStackOnceAlloc tmp_alloc;
|
||||
if (OB_FAIL(res_nmb.from((uint64_t)(0), tmp_alloc))) {
|
||||
LOG_WARN("get number from int failed", K(ret));
|
||||
@ -230,23 +374,38 @@ int ObExprInstrb::calc_instrb_expr_batch(const ObExpr &expr,
|
||||
const ObString& needle_str = needle.get_string();
|
||||
int64_t ret_idx = -1;
|
||||
ObNumStackOnceAlloc tmp_alloc;
|
||||
if (OB_ISNULL(needle_str.ptr())) {
|
||||
if (OB_ISNULL(needle_str.ptr()) || ob_is_empty_lob(needle_type, needle, needle_has_lob_header)) {
|
||||
ret = OB_CLOB_ONLY_SUPPORT_WITH_MULTIBYTE_FUN;
|
||||
LOG_WARN("ptr is null", K(needle_str));
|
||||
} else if (OB_UNLIKELY(0 == needle_str.length())) {
|
||||
} else if (OB_UNLIKELY(0 == needle_str.length() || ob_is_empty_lob(needle_type, needle, needle_has_lob_header))) {
|
||||
if (OB_FAIL(res_nmb.from((uint64_t)(0), tmp_alloc))) {
|
||||
LOG_WARN("get number from int failed", K(ret));
|
||||
} else {
|
||||
res_datum[i].set_number(res_nmb);
|
||||
}
|
||||
} else if (OB_FAIL(kmp_ctx->init(needle_str, pos_int < 0, ctx.exec_ctx_.get_allocator()))) {
|
||||
LOG_WARN("init kmp ctx failed", K(ret), K(needle_str));
|
||||
} else if (OB_FAIL(kmp_ctx->instrb_search(haystack_str, pos_int, occ_int, ret_idx))) {
|
||||
LOG_WARN("search needle in haystack failed", K(ret), K(haystack_str), K(needle_str));
|
||||
} else if (OB_FAIL(res_nmb.from(ret_idx, tmp_alloc))) {
|
||||
LOG_WARN("int64_t to ObNumber failed", K(ret), K(ret_idx));
|
||||
} else {
|
||||
res_datum[i].set_number(res_nmb);
|
||||
} else if (!ob_is_text_tc(expr.args_[0]->datum_meta_.type_)) {
|
||||
if (OB_FAIL(kmp_ctx->init(needle_str, pos_int < 0, ctx.exec_ctx_.get_allocator()))) {
|
||||
LOG_WARN("init kmp ctx failed", K(ret), K(needle_str));
|
||||
} else if (OB_FAIL(kmp_ctx->instrb_search(haystack_str, pos_int, occ_int, ret_idx))) {
|
||||
LOG_WARN("search needle in haystack failed", K(ret), K(haystack_str), K(needle_str));
|
||||
} else if (OB_FAIL(res_nmb.from(ret_idx, tmp_alloc))) {
|
||||
LOG_WARN("int64_t to ObNumber failed", K(ret), K(ret_idx));
|
||||
} else {
|
||||
res_datum[i].set_number(res_nmb);
|
||||
}
|
||||
} else { // text types
|
||||
// similar to instr, but reserve length is byte len, use binary cs type for byte access
|
||||
ObString haystack_data;
|
||||
ObString needle_data;
|
||||
ObEvalCtx::TempAllocGuard alloc_guard(ctx);
|
||||
ObIAllocator &calc_alloc = alloc_guard.get_allocator();
|
||||
ObTextStringIter haystack_iter(haystack_type, CS_TYPE_BINARY, haystack.get_string(), haystack_has_lob_header);
|
||||
ObTextStringIter needle_iter(needle_type, calc_cs_type, needle.get_string(), needle_has_lob_header);
|
||||
|
||||
if (OB_FAIL(calc_instrb_text(haystack_iter, needle_iter, calc_alloc,
|
||||
kmp_ctx, ctx, pos_int, occ_int, res_datum[i]))) {
|
||||
LOG_WARN("failed to calc_instrb_text", K(ret));
|
||||
}
|
||||
}
|
||||
}
|
||||
eval_flags.set(i);
|
||||
@ -256,7 +415,7 @@ int ObExprInstrb::calc_instrb_expr_batch(const ObExpr &expr,
|
||||
}
|
||||
|
||||
int ObExprInstrb::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr,
|
||||
ObExpr &rt_expr) const
|
||||
ObExpr &rt_expr) const
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
UNUSED(expr_cg_ctx);
|
||||
|
||||
Reference in New Issue
Block a user