statement_digest、statement_digest_text等处理latin1字符有乱>码,报错1064

This commit is contained in:
akaError
2024-02-08 17:14:19 +00:00
committed by ob-robot
parent 27c4a22c70
commit 30cf1c7598
3 changed files with 25 additions and 6 deletions

View File

@ -449,6 +449,8 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos)
// Most of the time, if it is not an identifier character, it maybe a space,
// comma, opening parenthesis, or closing parenthesis. This judgment logic is
// added here to avoid the next judgment whether it is utf8 char or gbk char
} else if (!is_oracle_mode_) {
idf_pos = notascii_gb_char(pos);
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
idf_pos = is_utf8_char(pos);
} else if (ObCharset::is_gb_charset(charset_type_)) {
@ -921,6 +923,17 @@ int ObFastParserBase::get_one_insert_row_str(ObRawSql &raw_sql,
return ret;
}
inline int64_t ObFastParserBase::notascii_gb_char(const int64_t pos)
{
int64_t idf_pos = -1;
if (notascii(raw_sql_.char_at(pos))) {
idf_pos = pos + 1;
} else {
idf_pos = is_gbk_char(pos);
}
return idf_pos;
}
inline int64_t ObFastParserBase::is_latin1_char(const int64_t pos)
{
int64_t idf_pos = -1;
@ -1672,6 +1685,8 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos)
// Most of the time, if it is not an identifier character, it maybe a space,
// comma, opening parenthesis, or closing parenthesis. This judgment logic is
// added here to avoid the next judgment whether it is utf8 char or gbk char
} else if (!is_oracle_mode_) {
idf_pos = notascii_gb_char(pos);
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
idf_pos = is_utf8_char(pos);
} else if (ObCharset::is_gb_charset(charset_type_)) {

View File

@ -381,6 +381,7 @@ protected:
return is_valid_char(ch) && USER_VAR_CHAR[static_cast<uint8_t>(ch)];
}
void reset_parser_node(ParseNode *node);
int64_t notascii_gb_char(const int64_t pos);
//{U}
int64_t is_latin1_char(const int64_t pos);
// ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U}
@ -440,6 +441,11 @@ protected:
return is_valid_char(ch) &&
static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0xfe;
}
inline bool notascii(char ch)
{
return is_valid_char(ch) &&
(static_cast<uint8_t>(ch) >= 0x80 && static_cast<uint8_t>(ch) <= 0xFF);
}
inline bool is_latin1(char ch)
{
return is_valid_char(ch) &&

View File

@ -38,14 +38,12 @@ extern void obsql_mysql_parser_fatal_error(int32_t errcoyyde, yyscan_t yyscanner
/* the adq is used to process dq in ANSI_QUOTES sql_mode*/
%x adq
U [\x80-\xbf]
U_2 [\xc2-\xdf]
U_3 [\xe0-\xef]
U_4 [\xf0-\xf4]
NOTASCII [\x80-\xFF]
GB_1 [\x81-\xfe]
GB_2 [\x40-\xfe]
GB_3 [\x30-\x39]
UTF8_GB_CHAR ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U}|{GB_1}{GB_2}|{GB_1}{GB_3}{GB_1}{GB_3})
NOTASCII_GB_CHAR ({NOTASCII}|{GB_1}{GB_2}|{GB_1}{GB_3}{GB_1}{GB_3})
space [ \t\n\r\f]
non_newline [^\n\r]
sql_comment ("--"[ \t]+{non_newline}*)|(#{non_newline}*|"--"[\n\r])
@ -62,7 +60,7 @@ common_hint_begin (\/\*\+({space}*hint{space}+)?)
c_cmt_begin \/\*
c_cmt_end \*+\/
comment ({sql_comment})
identifier (([A-Za-z0-9$_]|{UTF8_GB_CHAR})+)
identifier (([A-Za-z0-9$_]|{NOTASCII_GB_CHAR})+)
system_variable (@@[A-Za-z_][A-Za-z0-9_]*)|(@@[`][`A-Za-z_][`A-Za-z_]*)
user_variable (@[A-Za-z0-9_\.$]*)|(@[`'\"][`'\"A-Za-z0-9_\.$/%]*)
version_num ([0-9]+\.+[0-9]*)