[FEAT MERGE] Lob SQL refactoring (Mem-LobLocator, expressions and dbms_lob adaptions)

Co-authored-by: chaser-ch <chaser.ch@antgroup.com>
This commit is contained in:
obdev
2023-01-28 20:40:15 +08:00
committed by ob-robot
parent 4bb1033505
commit 3d4f554258
350 changed files with 19091 additions and 3918 deletions

View File

@ -15,6 +15,7 @@
#include "sql/engine/expr/ob_expr_soundex.h"
#include "sql/engine/ob_exec_context.h"
#include "sql/engine/expr/ob_expr_util.h"
#include "sql/engine/expr/ob_expr_lob_utils.h"
namespace oceanbase
{
@ -137,14 +138,15 @@ int ObExprSoundex::convert_str_to_soundex(const ObString &input,
const ObCollationType input_cs_type,
const bool use_original_algo,
const bool fix_min_len,
char *buf, const int64_t len, int64_t &pos)
char *buf, const int64_t len, int64_t &pos,
bool &is_first, int8_t &last_soundex_code)
{
int ret = OB_SUCCESS;
ObStringScanner scanner(input, input_cs_type);
ObString tmp_str;
int32_t wchar = 0;
int8_t soundex_code = 0;
int8_t pre_code = 0;
int8_t pre_code = last_soundex_code;
if (OB_UNLIKELY(len < MIN_RESULT_LENGTH)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("buf is not enough", K(ret), K(len));
@ -155,7 +157,7 @@ int ObExprSoundex::convert_str_to_soundex(const ObString &input,
LOG_WARN("get next character failed", K(ret), K(input));
}
} else if (FALSE_IT(soundex_code = get_character_code(wchar))) {
} else if (0 == pos) {
} else if (0 == pos && is_first) {
if (soundex_code >= 0) {
// only expect alphabetic character as beginning of result.
if (wchar <= static_cast<int32_t>('z') && wchar >= static_cast<int32_t>('a')) {
@ -167,6 +169,7 @@ int ObExprSoundex::convert_str_to_soundex(const ObString &input,
LOG_WARN("unexpected character", K(ret), K(wchar), K(soundex_code));
}
pre_code = soundex_code;
is_first = false;
} else {
// ignore nonalphabetic character
}
@ -198,6 +201,7 @@ int ObExprSoundex::convert_str_to_soundex(const ObString &input,
while(pos < MIN_RESULT_LENGTH) {
buf[pos++] = '0';
}
last_soundex_code = pre_code;
}
}
return ret;
@ -219,11 +223,14 @@ int ObExprSoundex::calc(const ObString &input, const ObCollationType intput_cs_t
} else {
buf = static_cast<char *>(res_alloc.alloc(buf_len));
}
bool is_first = true;
int8_t last_soundex_code = 0;
if (OB_ISNULL(buf)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory failed", K(ret), K(buf_len));
} else if (OB_FAIL(convert_str_to_soundex(input, intput_cs_type, !is_oracle_mode,
is_oracle_mode, buf, buf_len, pos))) {
is_oracle_mode, buf, buf_len, pos,
is_first, last_soundex_code))) {
LOG_WARN("calc soundex failed", K(ret));
} else if (need_charset_convert) {
if (OB_FAIL(ObExprUtil::convert_string_collation(ObString(pos, buf),
@ -239,6 +246,90 @@ int ObExprSoundex::calc(const ObString &input, const ObCollationType intput_cs_t
return ret;
}
int ObExprSoundex::calc_text(const ObDatum &input_datum,
const ObObjType input_type,
const ObObjType res_type,
const ObCollationType input_cs_type,
const ObCollationType res_cs_type,
const bool input_has_lob_header,
ObIAllocator &tmp_alloc, ObIAllocator &res_alloc,
ObString &out,
bool has_lob_header)
{
int ret = OB_SUCCESS;
const bool need_charset_convert = ObCharset::is_cs_nonascii(res_cs_type);
const bool is_oracle_mode = lib::is_oracle_mode();
char *buf = NULL;
int64_t pos = 0;
ObTextStringIter input_iter(input_type, input_cs_type, input_datum.get_string(), input_has_lob_header);
ObTextStringResult out_result(res_type, has_lob_header, &res_alloc);
int64_t buf_size = 0;
int64_t data_len = 0;
if (OB_FAIL(input_iter.init(0, NULL, &tmp_alloc))) {
LOG_WARN("init input_iter failed ", K(ret), K(input_iter));
} else if (OB_FAIL(input_iter.get_byte_len(data_len))) {
LOG_WARN("get input iter data len failed ", K(ret), K(input_iter));
} else if (FALSE_IT(buf_size = is_oracle_mode ? MIN_RESULT_LENGTH : MAX(MIN_RESULT_LENGTH, data_len))) {
} else if (OB_FAIL(out_result.init(buf_size))) {
LOG_WARN("init lob result failed", K(ret), K(out_result), K(buf_size));
} else if (OB_FAIL(out_result.get_reserved_buffer(buf, buf_size))) {
LOG_WARN("get empty buffer failed", K(ret), K(buf_size));
} else if (OB_ISNULL(buf)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory failed", K(ret), K(buf_size));
} else {
bool is_first = true;
int8_t last_soundex_code = 0;
ObTextStringIterState state;
ObString input_data;
ObString block_out;
char *block_buf = NULL;
while (OB_SUCC(ret)
&& buf_size > 0
&& (state = input_iter.get_next_block(input_data)) == TEXTSTRING_ITER_NEXT) {
ObDataBuffer buf_alloc(buf, buf_size);
int64_t block_buf_len = is_oracle_mode
? MIN_RESULT_LENGTH : MAX(MIN_RESULT_LENGTH, input_data.length());
if (need_charset_convert) {
block_buf = static_cast<char *>(tmp_alloc.alloc(block_buf_len));
} else {
block_buf = static_cast<char *>(buf_alloc.alloc(block_buf_len));
}
if (OB_FAIL(convert_str_to_soundex(input_data, input_cs_type, !is_oracle_mode,
is_oracle_mode, block_buf, block_buf_len, pos,
is_first, last_soundex_code))) {
LOG_WARN("calc soundex failed", K(ret));
} else if (need_charset_convert) {
if (OB_FAIL(ObExprUtil::convert_string_collation(ObString(pos, block_buf),
CS_TYPE_UTF8MB4_GENERAL_CI,
block_out,
res_cs_type,
buf_alloc))) {
LOG_WARN("convert string collation failed", K(ret));
} else if (OB_FAIL(out_result.lseek(block_out.length(), 0))) {
LOG_WARN("result lseek failed", K(ret));
}
} else { // nocharset convert
if (OB_FAIL(out_result.lseek(pos, 0))) {
LOG_WARN("result lseek failed", K(ret));
}
}
if (OB_SUCC(ret)) {
if (need_charset_convert) {
buf = buf + block_out.length();
buf_size = buf_size - block_out.length();
} else {
buf = buf + pos;
buf_size = buf_size - pos;
}
}
}
out_result.get_result_buffer(out);
}
return ret;
}
int ObExprSoundex::cg_expr(ObExprCGCtx &, const ObRawExpr &, ObExpr &rt_expr) const
{
int ret = OB_SUCCESS;
@ -263,9 +354,21 @@ int ObExprSoundex::eval_soundex(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &exp
const ObCollationType res_cs_type = expr.datum_meta_.cs_type_;
const ObCollationType input_cs_type = expr.args_[0]->datum_meta_.cs_type_;
ObString out;
if (OB_FAIL(calc(param->get_string(), input_cs_type, res_cs_type,
tmp_alloc_guard.get_allocator(),
expr_res_alloc, out))) {
const ObObjType input_type = expr.args_[0]->datum_meta_.type_;
const ObObjType res_type = expr.datum_meta_.type_;
if (!ob_is_text_tc(input_type) && !ob_is_text_tc(res_type)) {
ret = calc(param->get_string(), input_cs_type, res_cs_type,
tmp_alloc_guard.get_allocator(),
expr_res_alloc, out);
} else { // text tc
const bool input_has_lob_header = expr.args_[0]->obj_meta_.has_lob_header();
bool has_lob_header = expr.obj_meta_.has_lob_header();
ret = calc_text(*param, input_type, res_type, input_cs_type, res_cs_type,
input_has_lob_header,
tmp_alloc_guard.get_allocator(),
expr_res_alloc, out, has_lob_header);
}
if (OB_FAIL(ret)) {
LOG_WARN("calc soundex failed", K(ret));
} else if (out.empty() && is_oracle_mode()) {
expr_datum.set_null();