fix replace expr charset mistaken code bug
This commit is contained in:
20
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
20
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
@ -3141,10 +3141,10 @@ int ObCharsetUtils::init(ObIAllocator &allocator)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ObStringScanner::next_character(ObString &encoding, int32_t &wchar, int &ret)
|
bool ObStringScanner::next_character(ObString &encoding_value, int32_t &unicode_value, int &ret)
|
||||||
{
|
{
|
||||||
bool has_next = false;
|
bool has_next = false;
|
||||||
ret = next_character(encoding, wchar);
|
ret = next_character(encoding_value, unicode_value);
|
||||||
|
|
||||||
if (OB_ITER_END == ret) {
|
if (OB_ITER_END == ret) {
|
||||||
has_next = false;
|
has_next = false;
|
||||||
@ -3158,20 +3158,26 @@ bool ObStringScanner::next_character(ObString &encoding, int32_t &wchar, int &re
|
|||||||
return has_next;
|
return has_next;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ObStringScanner::next_character(ObString &encoding, int32_t &wchar)
|
int ObStringScanner::next_character(ObString &encoding_value, int32_t &unicode_value)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
int32_t length = 0;
|
int32_t length = 0;
|
||||||
|
|
||||||
ObString &str = const_cast<ObString &>(str_);
|
ObString &str = str_;
|
||||||
|
|
||||||
if (str.empty()) {
|
if (str.empty()) {
|
||||||
ret = OB_ITER_END;
|
ret = OB_ITER_END;
|
||||||
} else if (OB_FAIL(ObCharset::mb_wc(collation_type_, str.ptr(), str.length(), length, wchar))) {
|
} else if (OB_FAIL(ObCharset::mb_wc(collation_type_, str.ptr(), str.length(), length, unicode_value))) {
|
||||||
|
if (!!(IGNORE_INVALID_CHARACTER & flags_)) {
|
||||||
|
ret = OB_SUCCESS;
|
||||||
|
length = 1;
|
||||||
|
} else {
|
||||||
ret = OB_ERR_INCORRECT_STRING_VALUE;
|
ret = OB_ERR_INCORRECT_STRING_VALUE;
|
||||||
LOG_WARN("fail to call mb_wc", K(ret), KPHEX(str.ptr(), str.length()));
|
LOG_WARN("fail to call mb_wc", K(ret), KPHEX(str.ptr(), str.length()));
|
||||||
} else {
|
}
|
||||||
encoding.assign_ptr(str.ptr(), length);
|
}
|
||||||
|
if (OB_SUCC(ret)) {
|
||||||
|
encoding_value.assign_ptr(str.ptr(), length);
|
||||||
LOG_DEBUG("next_character", K(ret), KPHEX(str.ptr(), str.length()));
|
LOG_DEBUG("next_character", K(ret), KPHEX(str.ptr(), str.length()));
|
||||||
str += length;
|
str += length;
|
||||||
}
|
}
|
||||||
|
|||||||
17
deps/oblib/src/lib/charset/ob_charset.h
vendored
17
deps/oblib/src/lib/charset/ob_charset.h
vendored
@ -608,15 +608,22 @@ private:
|
|||||||
class ObStringScanner
|
class ObStringScanner
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ObStringScanner(const ObString &str, common::ObCollationType collation_type)
|
enum {
|
||||||
: str_(str), collation_type_(collation_type)
|
IGNORE_INVALID_CHARACTER = 1<<0,
|
||||||
|
};
|
||||||
|
ObStringScanner(const ObString &str, common::ObCollationType collation_type, uint64_t flags = 0)
|
||||||
|
: origin_str_(str), str_(str), collation_type_(collation_type), flags_(flags)
|
||||||
{}
|
{}
|
||||||
int next_character(ObString &encoding, int32_t &wchar);
|
int next_character(ObString &encoding_value, int32_t &unicode_value);
|
||||||
bool next_character(ObString &encoding, int32_t &wchar, int &ret);
|
bool next_character(ObString &encoding_value, int32_t &unicode_value, int &ret);
|
||||||
|
ObString get_remain_str() { return str_; }
|
||||||
|
void forward_bytes(int64_t n) { str_ += n; }
|
||||||
TO_STRING_KV(K_(str), K_(collation_type));
|
TO_STRING_KV(K_(str), K_(collation_type));
|
||||||
private:
|
private:
|
||||||
const ObString &str_;
|
const ObString &origin_str_;
|
||||||
|
ObString str_;
|
||||||
common::ObCollationType collation_type_;
|
common::ObCollationType collation_type_;
|
||||||
|
uint64_t flags_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ObCharSetString
|
class ObCharSetString
|
||||||
|
|||||||
@ -131,9 +131,10 @@ int ObExprReplace::replace(ObString &ret_str,
|
|||||||
ObExprStringBuf &string_buf)
|
ObExprStringBuf &string_buf)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
|
ObString dst_str;
|
||||||
|
bool is_null = false;
|
||||||
if (OB_UNLIKELY(text.length() <= 0)) {
|
if (OB_UNLIKELY(text.length() <= 0)) {
|
||||||
// Return empty string
|
// Return empty string
|
||||||
ret_str.reset();
|
|
||||||
} else if (OB_UNLIKELY(from.length() <= 0) || OB_UNLIKELY(to.length() < 0)) {
|
} else if (OB_UNLIKELY(from.length() <= 0) || OB_UNLIKELY(to.length() < 0)) {
|
||||||
ret_str = text;
|
ret_str = text;
|
||||||
} else if (OB_ISNULL(from.ptr())) {
|
} else if (OB_ISNULL(from.ptr())) {
|
||||||
@ -142,51 +143,29 @@ int ObExprReplace::replace(ObString &ret_str,
|
|||||||
} else if (OB_UNLIKELY(text.length() < from.length()) ||
|
} else if (OB_UNLIKELY(text.length() < from.length()) ||
|
||||||
OB_UNLIKELY(from == to)) {
|
OB_UNLIKELY(from == to)) {
|
||||||
ret_str = text;
|
ret_str = text;
|
||||||
|
} else if (OB_FAIL(ObSQLUtils::check_well_formed_str(text, cs_type, dst_str, is_null, false, false))
|
||||||
|
|| OB_FAIL(ObSQLUtils::check_well_formed_str(from, cs_type, dst_str, is_null, false, false))
|
||||||
|
|| OB_FAIL(ObSQLUtils::check_well_formed_str(to, cs_type, dst_str, is_null, false, false))) {
|
||||||
|
LOG_WARN("check well formed str failed", K(ret));
|
||||||
} else {
|
} else {
|
||||||
ObSEArray<uint32_t, 4> locations(common::ObModIds::OB_SQL_EXPR_REPLACE,
|
ObSEArray<uint32_t, 4> locations(common::ObModIds::OB_SQL_EXPR_REPLACE,
|
||||||
common::OB_MALLOC_NORMAL_BLOCK_SIZE);
|
common::OB_MALLOC_NORMAL_BLOCK_SIZE);
|
||||||
const char *buf_start = text.ptr();
|
ObString mb;
|
||||||
const char *buf_end = text.ptr() + text.length();
|
int32_t wc;
|
||||||
const ObCharsetInfo *cs = NULL;
|
ObStringScanner scanner(text, cs_type, ObStringScanner::IGNORE_INVALID_CHARACTER);
|
||||||
int error = 0;
|
while (OB_SUCC(ret) && scanner.get_remain_str().length() >= from.length()) {
|
||||||
if (OB_UNLIKELY(OB_ISNULL(cs = ObCharset::get_charset(cs_type)) ||
|
if (0 == MEMCMP(scanner.get_remain_str().ptr(), from.ptr(), from.length())) {
|
||||||
OB_ISNULL(cs->cset))) {
|
ret = locations.push_back(scanner.get_remain_str().ptr() - text.ptr());
|
||||||
ret = OB_ERR_UNEXPECTED;
|
scanner.forward_bytes(from.length());
|
||||||
LOG_ERROR("unexpected error. invalid argument(s)", K(cs_type));
|
} else if (OB_FAIL(scanner.next_character(mb, wc))) {
|
||||||
}
|
LOG_WARN("get next character failed", K(ret));
|
||||||
int32_t char_len = 0;
|
|
||||||
int32_t next_char_len = 0;
|
|
||||||
while (OB_SUCC(ret) && OB_LIKELY(error == 0) && buf_start + char_len < buf_end) {
|
|
||||||
char_len += static_cast<int32_t>(cs->cset->well_formed_len(cs, buf_start + char_len, buf_end, 1, &error));
|
|
||||||
if (OB_UNLIKELY(0 != error)) {
|
|
||||||
bool is_null = false;
|
|
||||||
//mysql strict mode will return null, otherwise will return something
|
|
||||||
//so we should get session to acquire if is_strict mode here.
|
|
||||||
//we now set is_strict=false.
|
|
||||||
if (OB_FAIL(ObSQLUtils::check_well_formed_str(text, cs_type, ret_str, is_null, false, false))) {
|
|
||||||
LOG_WARN("check well formed str failed", K(ret));
|
|
||||||
}
|
|
||||||
} else if (next_char_len == 0 && FALSE_IT(next_char_len = char_len)) {
|
|
||||||
} else if (char_len < from.length()) {
|
|
||||||
//do nothing
|
|
||||||
} else if (char_len > from.length()) {
|
|
||||||
buf_start += next_char_len;
|
|
||||||
char_len = 0;
|
|
||||||
next_char_len = 0;
|
|
||||||
} else if (0 == MEMCMP(buf_start, from.ptr(), char_len)) {
|
|
||||||
ret = locations.push_back(buf_start - text.ptr());
|
|
||||||
buf_start += char_len;
|
|
||||||
char_len = 0;
|
|
||||||
next_char_len = 0;
|
|
||||||
} else {
|
} else {
|
||||||
buf_start += next_char_len;
|
//do nothing
|
||||||
char_len = 0;
|
|
||||||
next_char_len = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t tot_length = 0;
|
int64_t tot_length = 0;
|
||||||
if (OB_UNLIKELY(error != 0)) {
|
if (OB_FAIL(ret)) {
|
||||||
} else if (OB_FAIL(ret)) {
|
|
||||||
ret_str.reset();
|
ret_str.reset();
|
||||||
} else if (locations.count() == 0) {
|
} else if (locations.count() == 0) {
|
||||||
ret_str = text;
|
ret_str = text;
|
||||||
@ -253,7 +232,7 @@ int ObExprReplace::eval_replace(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &exp
|
|||||||
expr_datum.set_datum(*text);
|
expr_datum.set_datum(*text);
|
||||||
} else if (!is_lob_res) { // non text tc inputs
|
} else if (!is_lob_res) { // non text tc inputs
|
||||||
if (OB_FAIL(replace(res,
|
if (OB_FAIL(replace(res,
|
||||||
expr.args_[0]->datum_meta_.cs_type_,
|
expr.datum_meta_.cs_type_,
|
||||||
text->get_string(),
|
text->get_string(),
|
||||||
!from->is_null() ? from->get_string() : ObString(),
|
!from->is_null() ? from->get_string() : ObString(),
|
||||||
(NULL != to && !to->is_null()) ? to->get_string() : ObString(),
|
(NULL != to && !to->is_null()) ? to->get_string() : ObString(),
|
||||||
@ -288,7 +267,7 @@ int ObExprReplace::eval_replace(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &exp
|
|||||||
LOG_WARN("failed to get string data", K(ret), K(expr.args_[2]->datum_meta_));
|
LOG_WARN("failed to get string data", K(ret), K(expr.args_[2]->datum_meta_));
|
||||||
}
|
}
|
||||||
if (OB_SUCC(ret)) {
|
if (OB_SUCC(ret)) {
|
||||||
if (OB_FAIL(replace(res, expr.args_[0]->datum_meta_.cs_type_, text_data, from_data, to_data, temp_allocator))) {
|
if (OB_FAIL(replace(res, expr.datum_meta_.cs_type_, text_data, from_data, to_data, temp_allocator))) {
|
||||||
LOG_WARN("do replace for lob resutl failed", K(ret), K(expr.datum_meta_.type_));
|
LOG_WARN("do replace for lob resutl failed", K(ret), K(expr.datum_meta_.type_));
|
||||||
} else if (OB_FAIL(ObTextStringHelper::string_to_templob_result(expr, ctx, expr_datum, res))) {
|
} else if (OB_FAIL(ObTextStringHelper::string_to_templob_result(expr, ctx, expr_datum, res))) {
|
||||||
LOG_WARN("set lob result failed", K(ret));
|
LOG_WARN("set lob result failed", K(ret));
|
||||||
|
|||||||
@ -93,7 +93,8 @@ int ObCreateTableStmt::get_first_stmt(ObString &first_stmt)
|
|||||||
first_stmt,
|
first_stmt,
|
||||||
get_query_ctx()->get_sql_stmt_coll_type(),
|
get_query_ctx()->get_sql_stmt_coll_type(),
|
||||||
ObCharset::get_system_collation(),
|
ObCharset::get_system_collation(),
|
||||||
first_stmt))) {
|
first_stmt,
|
||||||
|
ObCharset::REPLACE_UNKNOWN_CHARACTER))) {
|
||||||
LOG_WARN("fail to convert charset", K(ret), K(first_stmt),
|
LOG_WARN("fail to convert charset", K(ret), K(first_stmt),
|
||||||
"stmt collation type", get_query_ctx()->get_sql_stmt_coll_type());
|
"stmt collation type", get_query_ctx()->get_sql_stmt_coll_type());
|
||||||
}
|
}
|
||||||
|
|||||||
@ -34,7 +34,8 @@ int ObDDLStmt::get_first_stmt(ObString &first_stmt)
|
|||||||
first_stmt,
|
first_stmt,
|
||||||
get_query_ctx()->get_sql_stmt_coll_type(),
|
get_query_ctx()->get_sql_stmt_coll_type(),
|
||||||
ObCharset::get_system_collation(),
|
ObCharset::get_system_collation(),
|
||||||
first_stmt))) {
|
first_stmt,
|
||||||
|
ObCharset::REPLACE_UNKNOWN_CHARACTER))) {
|
||||||
LOG_WARN("fail to convert charset", K(ret), K(first_stmt),
|
LOG_WARN("fail to convert charset", K(ret), K(first_stmt),
|
||||||
"stmt collation type", get_query_ctx()->get_sql_stmt_coll_type());
|
"stmt collation type", get_query_ctx()->get_sql_stmt_coll_type());
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user