statement_digest、statement_digest_text等处理latin1字符有乱>码,报错1064

This commit is contained in:
akaError
2023-12-29 03:18:10 +00:00
committed by ob-robot
parent 0483e0f943
commit b6e1b48a33
3 changed files with 25 additions and 6 deletions

View File

@ -449,6 +449,8 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos)
// Most of the time, if it is not an identifier character, it maybe a space, // Most of the time, if it is not an identifier character, it maybe a space,
// comma, opening parenthesis, or closing parenthesis. This judgment logic is // comma, opening parenthesis, or closing parenthesis. This judgment logic is
// added here to avoid the next judgment whether it is utf8 char or gbk char // added here to avoid the next judgment whether it is utf8 char or gbk char
} else if (!is_oracle_mode_) {
idf_pos = notascii_gb_char(pos);
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) { } else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
idf_pos = is_utf8_char(pos); idf_pos = is_utf8_char(pos);
} else if (ObCharset::is_gb_charset(charset_type_)) { } else if (ObCharset::is_gb_charset(charset_type_)) {
@ -921,6 +923,17 @@ int ObFastParserBase::get_one_insert_row_str(ObRawSql &raw_sql,
return ret; return ret;
} }
inline int64_t ObFastParserBase::notascii_gb_char(const int64_t pos)
{
int64_t idf_pos = -1;
if (notascii(raw_sql_.char_at(pos))) {
idf_pos = pos + 1;
} else {
idf_pos = is_gbk_char(pos);
}
return idf_pos;
}
inline int64_t ObFastParserBase::is_latin1_char(const int64_t pos) inline int64_t ObFastParserBase::is_latin1_char(const int64_t pos)
{ {
int64_t idf_pos = -1; int64_t idf_pos = -1;
@ -1672,6 +1685,8 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos)
// Most of the time, if it is not an identifier character, it maybe a space, // Most of the time, if it is not an identifier character, it maybe a space,
// comma, opening parenthesis, or closing parenthesis. This judgment logic is // comma, opening parenthesis, or closing parenthesis. This judgment logic is
// added here to avoid the next judgment whether it is utf8 char or gbk char // added here to avoid the next judgment whether it is utf8 char or gbk char
} else if (!is_oracle_mode_) {
idf_pos = notascii_gb_char(pos);
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) { } else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
idf_pos = is_utf8_char(pos); idf_pos = is_utf8_char(pos);
} else if (ObCharset::is_gb_charset(charset_type_)) { } else if (ObCharset::is_gb_charset(charset_type_)) {

View File

@ -381,6 +381,7 @@ protected:
return is_valid_char(ch) && USER_VAR_CHAR[static_cast<uint8_t>(ch)]; return is_valid_char(ch) && USER_VAR_CHAR[static_cast<uint8_t>(ch)];
} }
void reset_parser_node(ParseNode *node); void reset_parser_node(ParseNode *node);
int64_t notascii_gb_char(const int64_t pos);
//{U} //{U}
int64_t is_latin1_char(const int64_t pos); int64_t is_latin1_char(const int64_t pos);
// ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U} // ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U}
@ -440,6 +441,11 @@ protected:
return is_valid_char(ch) && return is_valid_char(ch) &&
static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0xfe; static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0xfe;
} }
inline bool notascii(char ch)
{
return is_valid_char(ch) &&
(static_cast<uint8_t>(ch) >= 0x80 && static_cast<uint8_t>(ch) <= 0xFF);
}
inline bool is_latin1(char ch) inline bool is_latin1(char ch)
{ {
return is_valid_char(ch) && return is_valid_char(ch) &&

View File

@ -38,14 +38,12 @@ extern void obsql_mysql_parser_fatal_error(int32_t errcoyyde, yyscan_t yyscanner
/* the adq is used to process dq in ANSI_QUOTES sql_mode*/ /* the adq is used to process dq in ANSI_QUOTES sql_mode*/
%x adq %x adq
U [\x80-\xbf] NOTASCII [\x80-\xFF]
U_2 [\xc2-\xdf]
U_3 [\xe0-\xef]
U_4 [\xf0-\xf4]
GB_1 [\x81-\xfe] GB_1 [\x81-\xfe]
GB_2 [\x40-\xfe] GB_2 [\x40-\xfe]
GB_3 [\x30-\x39] GB_3 [\x30-\x39]
UTF8_GB_CHAR ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U}|{GB_1}{GB_2}|{GB_1}{GB_3}{GB_1}{GB_3}) NOTASCII_GB_CHAR ({NOTASCII}|{GB_1}{GB_2}|{GB_1}{GB_3}{GB_1}{GB_3})
space [ \t\n\r\f] space [ \t\n\r\f]
non_newline [^\n\r] non_newline [^\n\r]
sql_comment ("--"[ \t]+{non_newline}*)|(#{non_newline}*|"--"[\n\r]) sql_comment ("--"[ \t]+{non_newline}*)|(#{non_newline}*|"--"[\n\r])
@ -62,7 +60,7 @@ common_hint_begin (\/\*\+({space}*hint{space}+)?)
c_cmt_begin \/\* c_cmt_begin \/\*
c_cmt_end \*+\/ c_cmt_end \*+\/
comment ({sql_comment}) comment ({sql_comment})
identifier (([A-Za-z0-9$_]|{UTF8_GB_CHAR})+) identifier (([A-Za-z0-9$_]|{NOTASCII_GB_CHAR})+)
system_variable (@@[A-Za-z_][A-Za-z0-9_]*)|(@@[`][`A-Za-z_][`A-Za-z_]*) system_variable (@@[A-Za-z_][A-Za-z0-9_]*)|(@@[`][`A-Za-z_][`A-Za-z_]*)
user_variable (@[A-Za-z0-9_\.$]*)|(@[`'\"][`'\"A-Za-z0-9_\.$/%]*) user_variable (@[A-Za-z0-9_\.$]*)|(@[`'\"][`'\"A-Za-z0-9_\.$/%]*)
version_num ([0-9]+\.+[0-9]*) version_num ([0-9]+\.+[0-9]*)