fix replace expr charset mistaken code bug

This commit is contained in:
SevenJ-swj
2023-07-11 10:42:33 +00:00
committed by ob-robot
parent c5c92dfe7c
commit fa010efcd4
5 changed files with 51 additions and 57 deletions

View File

@ -3141,10 +3141,10 @@ int ObCharsetUtils::init(ObIAllocator &allocator)
return ret; return ret;
} }
bool ObStringScanner::next_character(ObString &encoding, int32_t &wchar, int &ret) bool ObStringScanner::next_character(ObString &encoding_value, int32_t &unicode_value, int &ret)
{ {
bool has_next = false; bool has_next = false;
ret = next_character(encoding, wchar); ret = next_character(encoding_value, unicode_value);
if (OB_ITER_END == ret) { if (OB_ITER_END == ret) {
has_next = false; has_next = false;
@ -3158,20 +3158,26 @@ bool ObStringScanner::next_character(ObString &encoding, int32_t &wchar, int &re
return has_next; return has_next;
} }
int ObStringScanner::next_character(ObString &encoding, int32_t &wchar) int ObStringScanner::next_character(ObString &encoding_value, int32_t &unicode_value)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
int32_t length = 0; int32_t length = 0;
ObString &str = const_cast<ObString &>(str_); ObString &str = str_;
if (str.empty()) { if (str.empty()) {
ret = OB_ITER_END; ret = OB_ITER_END;
} else if (OB_FAIL(ObCharset::mb_wc(collation_type_, str.ptr(), str.length(), length, wchar))) { } else if (OB_FAIL(ObCharset::mb_wc(collation_type_, str.ptr(), str.length(), length, unicode_value))) {
if (!!(IGNORE_INVALID_CHARACTER & flags_)) {
ret = OB_SUCCESS;
length = 1;
} else {
ret = OB_ERR_INCORRECT_STRING_VALUE; ret = OB_ERR_INCORRECT_STRING_VALUE;
LOG_WARN("fail to call mb_wc", K(ret), KPHEX(str.ptr(), str.length())); LOG_WARN("fail to call mb_wc", K(ret), KPHEX(str.ptr(), str.length()));
} else { }
encoding.assign_ptr(str.ptr(), length); }
if (OB_SUCC(ret)) {
encoding_value.assign_ptr(str.ptr(), length);
LOG_DEBUG("next_character", K(ret), KPHEX(str.ptr(), str.length())); LOG_DEBUG("next_character", K(ret), KPHEX(str.ptr(), str.length()));
str += length; str += length;
} }

View File

@ -608,15 +608,22 @@ private:
class ObStringScanner class ObStringScanner
{ {
public: public:
ObStringScanner(const ObString &str, common::ObCollationType collation_type) enum {
: str_(str), collation_type_(collation_type) IGNORE_INVALID_CHARACTER = 1<<0,
};
ObStringScanner(const ObString &str, common::ObCollationType collation_type, uint64_t flags = 0)
: origin_str_(str), str_(str), collation_type_(collation_type), flags_(flags)
{} {}
int next_character(ObString &encoding, int32_t &wchar); int next_character(ObString &encoding_value, int32_t &unicode_value);
bool next_character(ObString &encoding, int32_t &wchar, int &ret); bool next_character(ObString &encoding_value, int32_t &unicode_value, int &ret);
ObString get_remain_str() { return str_; }
void forward_bytes(int64_t n) { str_ += n; }
TO_STRING_KV(K_(str), K_(collation_type)); TO_STRING_KV(K_(str), K_(collation_type));
private: private:
const ObString &str_; const ObString &origin_str_;
ObString str_;
common::ObCollationType collation_type_; common::ObCollationType collation_type_;
uint64_t flags_;
}; };
class ObCharSetString class ObCharSetString

View File

@ -131,9 +131,10 @@ int ObExprReplace::replace(ObString &ret_str,
ObExprStringBuf &string_buf) ObExprStringBuf &string_buf)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
ObString dst_str;
bool is_null = false;
if (OB_UNLIKELY(text.length() <= 0)) { if (OB_UNLIKELY(text.length() <= 0)) {
// Return empty string // Return empty string
ret_str.reset();
} else if (OB_UNLIKELY(from.length() <= 0) || OB_UNLIKELY(to.length() < 0)) { } else if (OB_UNLIKELY(from.length() <= 0) || OB_UNLIKELY(to.length() < 0)) {
ret_str = text; ret_str = text;
} else if (OB_ISNULL(from.ptr())) { } else if (OB_ISNULL(from.ptr())) {
@ -142,51 +143,29 @@ int ObExprReplace::replace(ObString &ret_str,
} else if (OB_UNLIKELY(text.length() < from.length()) || } else if (OB_UNLIKELY(text.length() < from.length()) ||
OB_UNLIKELY(from == to)) { OB_UNLIKELY(from == to)) {
ret_str = text; ret_str = text;
} else if (OB_FAIL(ObSQLUtils::check_well_formed_str(text, cs_type, dst_str, is_null, false, false))
|| OB_FAIL(ObSQLUtils::check_well_formed_str(from, cs_type, dst_str, is_null, false, false))
|| OB_FAIL(ObSQLUtils::check_well_formed_str(to, cs_type, dst_str, is_null, false, false))) {
LOG_WARN("check well formed str failed", K(ret));
} else { } else {
ObSEArray<uint32_t, 4> locations(common::ObModIds::OB_SQL_EXPR_REPLACE, ObSEArray<uint32_t, 4> locations(common::ObModIds::OB_SQL_EXPR_REPLACE,
common::OB_MALLOC_NORMAL_BLOCK_SIZE); common::OB_MALLOC_NORMAL_BLOCK_SIZE);
const char *buf_start = text.ptr(); ObString mb;
const char *buf_end = text.ptr() + text.length(); int32_t wc;
const ObCharsetInfo *cs = NULL; ObStringScanner scanner(text, cs_type, ObStringScanner::IGNORE_INVALID_CHARACTER);
int error = 0; while (OB_SUCC(ret) && scanner.get_remain_str().length() >= from.length()) {
if (OB_UNLIKELY(OB_ISNULL(cs = ObCharset::get_charset(cs_type)) || if (0 == MEMCMP(scanner.get_remain_str().ptr(), from.ptr(), from.length())) {
OB_ISNULL(cs->cset))) { ret = locations.push_back(scanner.get_remain_str().ptr() - text.ptr());
ret = OB_ERR_UNEXPECTED; scanner.forward_bytes(from.length());
LOG_ERROR("unexpected error. invalid argument(s)", K(cs_type)); } else if (OB_FAIL(scanner.next_character(mb, wc))) {
} LOG_WARN("get next character failed", K(ret));
int32_t char_len = 0;
int32_t next_char_len = 0;
while (OB_SUCC(ret) && OB_LIKELY(error == 0) && buf_start + char_len < buf_end) {
char_len += static_cast<int32_t>(cs->cset->well_formed_len(cs, buf_start + char_len, buf_end, 1, &error));
if (OB_UNLIKELY(0 != error)) {
bool is_null = false;
//mysql strict mode will return null, otherwise will return something
//so we should get session to acquire if is_strict mode here.
//we now set is_strict=false.
if (OB_FAIL(ObSQLUtils::check_well_formed_str(text, cs_type, ret_str, is_null, false, false))) {
LOG_WARN("check well formed str failed", K(ret));
}
} else if (next_char_len == 0 && FALSE_IT(next_char_len = char_len)) {
} else if (char_len < from.length()) {
//do nothing
} else if (char_len > from.length()) {
buf_start += next_char_len;
char_len = 0;
next_char_len = 0;
} else if (0 == MEMCMP(buf_start, from.ptr(), char_len)) {
ret = locations.push_back(buf_start - text.ptr());
buf_start += char_len;
char_len = 0;
next_char_len = 0;
} else { } else {
buf_start += next_char_len; //do nothing
char_len = 0;
next_char_len = 0;
} }
} }
int64_t tot_length = 0; int64_t tot_length = 0;
if (OB_UNLIKELY(error != 0)) { if (OB_FAIL(ret)) {
} else if (OB_FAIL(ret)) {
ret_str.reset(); ret_str.reset();
} else if (locations.count() == 0) { } else if (locations.count() == 0) {
ret_str = text; ret_str = text;
@ -253,7 +232,7 @@ int ObExprReplace::eval_replace(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &exp
expr_datum.set_datum(*text); expr_datum.set_datum(*text);
} else if (!is_lob_res) { // non text tc inputs } else if (!is_lob_res) { // non text tc inputs
if (OB_FAIL(replace(res, if (OB_FAIL(replace(res,
expr.args_[0]->datum_meta_.cs_type_, expr.datum_meta_.cs_type_,
text->get_string(), text->get_string(),
!from->is_null() ? from->get_string() : ObString(), !from->is_null() ? from->get_string() : ObString(),
(NULL != to && !to->is_null()) ? to->get_string() : ObString(), (NULL != to && !to->is_null()) ? to->get_string() : ObString(),
@ -288,7 +267,7 @@ int ObExprReplace::eval_replace(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &exp
LOG_WARN("failed to get string data", K(ret), K(expr.args_[2]->datum_meta_)); LOG_WARN("failed to get string data", K(ret), K(expr.args_[2]->datum_meta_));
} }
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (OB_FAIL(replace(res, expr.args_[0]->datum_meta_.cs_type_, text_data, from_data, to_data, temp_allocator))) { if (OB_FAIL(replace(res, expr.datum_meta_.cs_type_, text_data, from_data, to_data, temp_allocator))) {
LOG_WARN("do replace for lob resutl failed", K(ret), K(expr.datum_meta_.type_)); LOG_WARN("do replace for lob resutl failed", K(ret), K(expr.datum_meta_.type_));
} else if (OB_FAIL(ObTextStringHelper::string_to_templob_result(expr, ctx, expr_datum, res))) { } else if (OB_FAIL(ObTextStringHelper::string_to_templob_result(expr, ctx, expr_datum, res))) {
LOG_WARN("set lob result failed", K(ret)); LOG_WARN("set lob result failed", K(ret));

View File

@ -93,7 +93,8 @@ int ObCreateTableStmt::get_first_stmt(ObString &first_stmt)
first_stmt, first_stmt,
get_query_ctx()->get_sql_stmt_coll_type(), get_query_ctx()->get_sql_stmt_coll_type(),
ObCharset::get_system_collation(), ObCharset::get_system_collation(),
first_stmt))) { first_stmt,
ObCharset::REPLACE_UNKNOWN_CHARACTER))) {
LOG_WARN("fail to convert charset", K(ret), K(first_stmt), LOG_WARN("fail to convert charset", K(ret), K(first_stmt),
"stmt collation type", get_query_ctx()->get_sql_stmt_coll_type()); "stmt collation type", get_query_ctx()->get_sql_stmt_coll_type());
} }

View File

@ -34,7 +34,8 @@ int ObDDLStmt::get_first_stmt(ObString &first_stmt)
first_stmt, first_stmt,
get_query_ctx()->get_sql_stmt_coll_type(), get_query_ctx()->get_sql_stmt_coll_type(),
ObCharset::get_system_collation(), ObCharset::get_system_collation(),
first_stmt))) { first_stmt,
ObCharset::REPLACE_UNKNOWN_CHARACTER))) {
LOG_WARN("fail to convert charset", K(ret), K(first_stmt), LOG_WARN("fail to convert charset", K(ret), K(first_stmt),
"stmt collation type", get_query_ctx()->get_sql_stmt_coll_type()); "stmt collation type", get_query_ctx()->get_sql_stmt_coll_type());
} }